summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/Kconfig.binfmt8
-rw-r--r--fs/affs/amigaffs.c63
-rw-r--r--fs/affs/file.c26
-rw-r--r--fs/binfmt_elf.c14
-rw-r--r--fs/block_dev.c22
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c16
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/export.h5
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c54
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/free-space-cache.c10
-rw-r--r--fs/btrfs/inode.c83
-rw-r--r--fs/btrfs/ioctl.c33
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c10
-rw-r--r--fs/btrfs/scrub.c17
-rw-r--r--fs/btrfs/send.c67
-rw-r--r--fs/btrfs/super.c18
-rw-r--r--fs/btrfs/tests/btrfs-tests.c8
-rw-r--r--fs/btrfs/tests/inode-tests.c1
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c56
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c25
-rw-r--r--fs/cachefiles/rdwr.c5
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/caps.c36
-rw-r--r--fs/ceph/export.c5
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/mds_client.c14
-rw-r--r--fs/char_dev.c86
-rw-r--r--fs/cifs/asn1.c16
-rw-r--r--fs/cifs/cifs_unicode.c17
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifssmb.c12
-rw-r--r--fs/cifs/connect.c23
-rw-r--r--fs/cifs/dir.c22
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2misc.c4
-rw-r--r--fs/cifs/smb2ops.c14
-rw-r--r--fs/cifs/smb2pdu.c12
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/dlm/debug_fs.c1
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/ecryptfs/crypto.c6
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/efivarfs/inode.c2
-rw-r--r--fs/efivarfs/super.c3
-rw-r--r--fs/eventpoll.c94
-rw-r--r--fs/exec.c6
-rw-r--r--fs/ext2/balloc.c14
-rw-r--r--fs/ext2/ialloc.c3
-rw-r--r--fs/ext4/block_validity.c116
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ext4.h24
-rw-r--r--fs/ext4/ext4_extents.h9
-rw-r--r--fs/ext4/extents.c99
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/ialloc.c61
-rw-r--r--fs/ext4/indirect.c6
-rw-r--r--fs/ext4/inline.c7
-rw-r--r--fs/ext4/inode.c74
-rw-r--r--fs/ext4/ioctl.c5
-rw-r--r--fs/ext4/mballoc.c11
-rw-r--r--fs/ext4/namei.c168
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c49
-rw-r--r--fs/ext4/xattr.c66
-rw-r--r--fs/f2fs/checkpoint.c2
-rw-r--r--fs/f2fs/dir.c19
-rw-r--r--fs/f2fs/file.c3
-rw-r--r--fs/f2fs/namei.c6
-rw-r--r--fs/fat/inode.c6
-rw-r--r--fs/file.c2
-rw-r--r--fs/fs-writeback.c120
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c24
-rw-r--r--fs/fuse/file.c12
-rw-r--r--fs/gfs2/file.c5
-rw-r--r--fs/gfs2/glock.c8
-rw-r--r--fs/gfs2/lock_dlm.c13
-rw-r--r--fs/gfs2/ops_fstype.c20
-rw-r--r--fs/gfs2/rgrp.c9
-rw-r--r--fs/hfs/bfind.c14
-rw-r--r--fs/hfs/bnode.c25
-rw-r--r--fs/hfs/btree.h7
-rw-r--r--fs/hfs/super.c10
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/isofs/dir.c1
-rw-r--r--fs/isofs/namei.c1
-rw-r--r--fs/jbd2/transaction.c26
-rw-r--r--fs/jffs2/compr_rtime.c3
-rw-r--r--fs/jffs2/dir.c6
-rw-r--r--fs/jffs2/readinode.c16
-rw-r--r--fs/jffs2/scan.c2
-rw-r--r--fs/jffs2/summary.c3
-rw-r--r--fs/jfs/inode.c3
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/jfs_dmap.h2
-rw-r--r--fs/jfs/jfs_filsys.h1
-rw-r--r--fs/jfs/jfs_logmgr.c1
-rw-r--r--fs/jfs/jfs_mount.c10
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/lockd/host.c20
-rw-r--r--fs/minix/inode.c36
-rw-r--r--fs/minix/itree_common.c8
-rw-r--r--fs/namespace.c44
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c13
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/namespace.c12
-rw-r--r--fs/nfs/nfs3acl.c22
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfs/nfs42proc.c5
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c89
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs/pagelist.c12
-rw-r--r--fs/nfs_common/grace.c6
-rw-r--r--fs/nfsd/nfs3xdr.c7
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4xdr.c19
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/sysfs.c27
-rw-r--r--fs/ntfs/inode.c14
-rw-r--r--fs/ocfs2/file.c74
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/suballoc.c13
-rw-r--r--fs/ocfs2/super.c19
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/pipe.c17
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/base.c15
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/self.c9
-rw-r--r--fs/proc/thread_self.c2
-rw-r--r--fs/qnx4/dir.c69
-rw-r--r--fs/quota/quota_tree.c8
-rw-r--r--fs/quota/quota_v2.c1
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/journal.c14
-rw-r--r--fs/reiserfs/stree.c27
-rw-r--r--fs/reiserfs/super.c16
-rw-r--r--fs/reiserfs/xattr.c7
-rw-r--r--fs/reiserfs/xattr.h2
-rw-r--r--fs/romfs/storage.c4
-rw-r--r--fs/seq_file.c4
-rw-r--r--fs/squashfs/export.c45
-rw-r--r--fs/squashfs/file.c6
-rw-r--r--fs/squashfs/id.c42
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c6
-rw-r--r--fs/squashfs/xattr.h10
-rw-r--r--fs/squashfs/xattr_id.c68
-rw-r--r--fs/sysfs/file.c55
-rw-r--r--fs/timerfd.c10
-rw-r--r--fs/ubifs/debug.c1
-rw-r--r--fs/ubifs/io.c29
-rw-r--r--fs/udf/inode.c25
-rw-r--r--fs/udf/misc.c13
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c13
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_icache.c58
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c21
-rw-r--r--fs/xfs/xfs_sysfs.h6
-rw-r--r--fs/xfs/xfs_trans_dquot.c2
194 files changed, 2363 insertions, 937 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 1e9bb8db7b48..3a56f4fa59f9 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -457,10 +457,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
}
#ifdef CONFIG_9P_FSCACHE
- if (v9ses->fscache) {
+ if (v9ses->fscache)
v9fs_cache_session_put_cookie(v9ses);
- kfree(v9ses->cachetag);
- }
+ kfree(v9ses->cachetag);
#endif
kfree(v9ses->uname);
kfree(v9ses->aname);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 9dbf37147126..b5aa3e005b9e 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -624,9 +624,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_ALL,
- .range_start = vma->vm_pgoff * PAGE_SIZE,
+ .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
/* absolute end, byte at end included */
- .range_end = vma->vm_pgoff * PAGE_SIZE +
+ .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
(vma->vm_end - vma->vm_start - 1),
};
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 2d0cbbd14cfc..72c03354c14b 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -1,6 +1,7 @@
config BINFMT_ELF
bool "Kernel support for ELF binaries"
depends on MMU && (BROKEN || !FRV)
+ select ELFCORE
default y
---help---
ELF (Executable and Linkable Format) is a format for libraries and
@@ -26,6 +27,7 @@ config BINFMT_ELF
config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
+ select ELFCORE
config ARCH_BINFMT_ELF_STATE
bool
@@ -34,6 +36,7 @@ config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
depends on (FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
+ select ELFCORE
help
ELF FDPIC binaries are based on ELF, but allow the individual load
segments of a binary to be located in memory independently of each
@@ -43,6 +46,11 @@ config BINFMT_ELF_FDPIC
It is also possible to run FDPIC ELF binaries on MMU linux also.
+config ELFCORE
+ bool
+ help
+ This option enables kernel/elfcore.o.
+
config CORE_DUMP_DEFAULT_ELF_HEADERS
bool "Write ELF core dumps with partial segments"
default y
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 5fa92bc790ef..c1b344e56e85 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -390,23 +390,23 @@ prot_to_mode(u32 prot)
umode_t mode = 0;
if (!(prot & FIBF_NOWRITE))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!(prot & FIBF_NOREAD))
- mode |= S_IRUSR;
+ mode |= 0400;
if (!(prot & FIBF_NOEXECUTE))
- mode |= S_IXUSR;
+ mode |= 0100;
if (prot & FIBF_GRP_WRITE)
- mode |= S_IWGRP;
+ mode |= 0020;
if (prot & FIBF_GRP_READ)
- mode |= S_IRGRP;
+ mode |= 0040;
if (prot & FIBF_GRP_EXECUTE)
- mode |= S_IXGRP;
+ mode |= 0010;
if (prot & FIBF_OTR_WRITE)
- mode |= S_IWOTH;
+ mode |= 0002;
if (prot & FIBF_OTR_READ)
- mode |= S_IROTH;
+ mode |= 0004;
if (prot & FIBF_OTR_EXECUTE)
- mode |= S_IXOTH;
+ mode |= 0001;
return mode;
}
@@ -417,24 +417,51 @@ mode_to_prot(struct inode *inode)
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
- if (!(mode & S_IXUSR))
+ /*
+ * First, clear all RWED bits for owner, group, other.
+ * Then, recalculate them afresh.
+ *
+ * We'll always clear the delete-inhibit bit for the owner, as that is
+ * the classic single-user mode AmigaOS protection bit and we need to
+ * stay compatible with all scenarios.
+ *
+ * Since multi-user AmigaOS is an extension, we'll only set the
+ * delete-allow bit if any of the other bits in the same user class
+ * (group/other) are used.
+ */
+ prot &= ~(FIBF_NOEXECUTE | FIBF_NOREAD
+ | FIBF_NOWRITE | FIBF_NODELETE
+ | FIBF_GRP_EXECUTE | FIBF_GRP_READ
+ | FIBF_GRP_WRITE | FIBF_GRP_DELETE
+ | FIBF_OTR_EXECUTE | FIBF_OTR_READ
+ | FIBF_OTR_WRITE | FIBF_OTR_DELETE);
+
+ /* Classic single-user AmigaOS flags. These are inverted. */
+ if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
- if (!(mode & S_IRUSR))
+ if (!(mode & 0400))
prot |= FIBF_NOREAD;
- if (!(mode & S_IWUSR))
+ if (!(mode & 0200))
prot |= FIBF_NOWRITE;
- if (mode & S_IXGRP)
+
+ /* Multi-user extended flags. Not inverted. */
+ if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
- if (mode & S_IRGRP)
+ if (mode & 0040)
prot |= FIBF_GRP_READ;
- if (mode & S_IWGRP)
+ if (mode & 0020)
prot |= FIBF_GRP_WRITE;
- if (mode & S_IXOTH)
+ if (mode & 0070)
+ prot |= FIBF_GRP_DELETE;
+
+ if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
- if (mode & S_IROTH)
+ if (mode & 0004)
prot |= FIBF_OTR_READ;
- if (mode & S_IWOTH)
+ if (mode & 0002)
prot |= FIBF_OTR_WRITE;
+ if (mode & 0007)
+ prot |= FIBF_OTR_DELETE;
AFFS_I(inode)->i_protect = prot;
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 659c579c4588..38e0fd4caf2b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -426,6 +426,24 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
+static int affs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
+ return ret;
+}
+
static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,affs_get_block);
@@ -435,7 +453,7 @@ const struct address_space_operations affs_aops = {
.readpage = affs_readpage,
.writepage = affs_writepage,
.write_begin = affs_write_begin,
- .write_end = generic_write_end,
+ .write_end = affs_write_end,
.direct_IO = affs_direct_IO,
.bmap = _affs_bmap
};
@@ -793,6 +811,12 @@ done:
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
err_first_bh:
unlock_page(page);
page_cache_release(page);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f1f32e55d877..eddf5746cf51 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1097,6 +1097,18 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->start_stack = bprm->p;
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
+ /*
+ * For architectures with ELF randomization, when executing
+ * a loader directly (i.e. no interpreter listed in ELF
+ * headers), move the brk area out of the mmap region
+ * (since it grows up, and may collide early with the stack
+ * growing down), and into the unused ELF_ET_DYN_BASE region.
+ */
+ if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
+ loc->elf_ex.e_type == ET_DYN && !interpreter)
+ current->mm->brk = current->mm->start_brk =
+ ELF_ET_DYN_BASE;
+
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#ifdef compat_brk_randomized
@@ -1714,7 +1726,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
(!regset->active || regset->active(t->task, regset) > 0)) {
int ret;
size_t size = regset->n * regset->size;
- void *data = kmalloc(size, GFP_KERNEL);
+ void *data = kzalloc(size, GFP_KERNEL);
if (unlikely(!data))
return 0;
ret = regset->get(t->task, regset,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 26bbaaefdff4..a71d442ef7d0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1181,10 +1181,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
*/
if (!for_part) {
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0) {
- bdput(bdev);
+ if (ret != 0)
return ret;
- }
}
restart:
@@ -1253,8 +1251,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, mode, 1);
- if (ret)
+ if (ret) {
+ bdput(whole);
goto out_clear;
+ }
bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
@@ -1311,7 +1311,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
put_disk(disk);
module_put(owner);
out:
- bdput(bdev);
return ret;
}
@@ -1397,6 +1396,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
bdput(whole);
}
+ if (res)
+ bdput(bdev);
+
return res;
}
EXPORT_SYMBOL(blkdev_get);
@@ -1513,6 +1515,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
+ /*
+ * Sync early if it looks like we're the last one. If someone else
+ * opens the block device between now and the decrement of bd_openers
+ * then we did a sync that we didn't need to, but that's not the end
+ * of the world and we want to avoid long (could be several minute)
+ * syncs while holding the mutex.
+ */
+ if (bdev->bd_openers == 1)
+ sync_blockdev(bdev);
+
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 80e9c18ea64f..fd6b67c40d9d 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,6 +9,8 @@ config BTRFS_FS
select RAID6_PQ
select XOR_BLOCKS
select SRCU
+ depends on !PPC_256K_PAGES # powerpc
+ depends on !PAGE_SIZE_256KB # hexagon
help
Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 81c5d07a2af1..228bfa19b745 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1221,6 +1221,7 @@ static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
+ *roots = NULL;
return ret;
}
node = ulist_next(tmp, &uiter);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bae05c5c75ba..92601775ec5e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -290,7 +290,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_error ? 0 : 1);
+ !cb->errors);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 8eac5f75bca3..fbb4c81f6311 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1129,6 +1129,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -1136,6 +1138,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -1174,6 +1178,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (last_ref) {
ret = tree_mod_log_free_eb(root->fs_info, buf);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -1372,7 +1378,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
- extent_buffer_get(eb_rewin);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1424,7 +1431,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_warn(root->fs_info,
"failed to read tree block %llu from get_old_root", logical);
} else {
+ btrfs_tree_read_lock(old);
eb = btrfs_clone_extent_buffer(old);
+ btrfs_tree_read_unlock(old);
free_extent_buffer(old);
}
} else if (old_root) {
@@ -1441,8 +1450,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- extent_buffer_get(eb);
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1450,6 +1457,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
else
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0b06d4942da7..8fb9a1e0048b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4096,6 +4096,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid);
#ifdef CONFIG_PRINTK
__printf(2, 3)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 774728143b63..de63cb9bc64b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1750,7 +1750,7 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(root->fs_info);
sleep:
- if (!try_to_freeze() && !again) {
+ if (!again) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 2513a7f53334..92f80ed64219 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -55,9 +55,9 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
return type;
}
-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
- int check_generation)
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation,
+ int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
@@ -150,7 +150,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
-static struct dentry *btrfs_get_parent(struct dentry *child)
+struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = d_inode(child);
struct btrfs_root *root = BTRFS_I(dir)->root;
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index 074348a95841..7a305e554999 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -16,4 +16,9 @@ struct btrfs_fid {
u64 parent_root_objectid;
} __attribute__ ((packed));
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation,
+ int check_generation);
+struct dentry *btrfs_get_parent(struct dentry *child);
+
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 34ffc125763f..3bb731b2156c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10688,7 +10688,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
- return 1;
+ return -EINVAL;
features = btrfs_super_incompat_flags(disk_super);
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c86c472f670..b28bc7690d4b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4000,6 +4000,10 @@ retry:
if (!ret) {
free_extent_buffer(eb);
continue;
+ } else if (ret < 0) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
}
ret = write_one_eb(eb, fs_info, wbc, &epd);
@@ -4437,6 +4441,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
/* once for us */
free_extent_map(em);
+
+ cond_resched(); /* Allow large-extent preemption. */
}
}
return try_release_extent_state(map, tree, page, mask);
@@ -4861,25 +4867,28 @@ err:
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
+ /*
+ * The TREE_REF bit is first set when the extent_buffer is added
+ * to the radix tree. It is also reset, if unset, when a new reference
+ * is created by find_extent_buffer.
*
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
+ * It is only cleared in two cases: freeing the last non-tree
+ * reference to the extent_buffer when its STALE bit is set or
+ * calling releasepage when the tree reference is the only reference.
*
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
+ * In both cases, care is taken to ensure that the extent_buffer's
+ * pages are not under io. However, releasepage can be concurrently
+ * called with creating new references, which is prone to race
+ * conditions between the calls to check_buffer_tree_ref in those
+ * codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
+ * The actual lifetime of the extent_buffer in the radix tree is
+ * adequately protected by the refcount, but the TREE_REF bit and
+ * its corresponding reference are not. To protect against this
+ * class of races, we call check_buffer_tree_ref from the codepaths
+ * which trigger io after they set eb->io_pages. Note that once io is
+ * initiated, TREE_REF can no longer be cleared, so that is the
+ * moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -5346,6 +5355,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
+ /*
+ * It is possible for releasepage to clear the TREE_REF bit before we
+ * set io_pages. See check_buffer_tree_ref for a more detailed comment.
+ */
+ check_buffer_tree_ref(eb);
for (i = start_i; i < num_pages; i++) {
page = eb->pages[i];
if (!PageUptodate(page)) {
@@ -5421,9 +5435,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5444,7 +5458,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_CACHE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (probe_user_write(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 751435967724..9631be7fc9e2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -313,9 +313,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer(struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 58ece6558430..fb5c97ea670f 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -742,10 +742,12 @@ again:
nritems = btrfs_header_nritems(path->nodes[0]);
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
- if (ret == 1)
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
found_next = 1;
- if (ret != 0)
goto insert;
+ }
slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 73b547f88bfc..2426dc56426f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1089,7 +1089,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_nr = 0;
int del_slot = 0;
int recow;
- int ret;
+ int ret = 0;
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
@@ -1284,7 +1284,7 @@ again:
}
out:
btrfs_free_path(path);
- return 0;
+ return ret;
}
/*
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e5351d9a8dfb..55d8020afc58 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -754,8 +754,10 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
while (num_entries) {
e = kmem_cache_zalloc(btrfs_free_space_cachep,
GFP_NOFS);
- if (!e)
+ if (!e) {
+ ret = -ENOMEM;
goto free_cache;
+ }
ret = io_ctl_read_entry(&io_ctl, e, &type);
if (ret) {
@@ -764,6 +766,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
if (!e->bytes) {
+ ret = -1;
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
@@ -783,6 +786,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
num_bitmaps--;
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
+ ret = -ENOMEM;
kmem_cache_free(
btrfs_free_space_cachep, e);
goto free_cache;
@@ -2158,7 +2162,7 @@ out:
static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, bool update_stat)
{
- struct btrfs_free_space *left_info;
+ struct btrfs_free_space *left_info = NULL;
struct btrfs_free_space *right_info;
bool merged = false;
u64 offset = info->offset;
@@ -2173,7 +2177,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
if (right_info && rb_prev(&right_info->offset_index))
left_info = rb_entry(rb_prev(&right_info->offset_index),
struct btrfs_free_space, offset_index);
- else
+ else if (!right_info)
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
if (right_info && !right_info->bitmap) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 548e9cd1a337..6d846ff696fb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -926,7 +926,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
- u64 disk_num_bytes;
+ u64 min_alloc_size;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
struct btrfs_key ins;
@@ -942,7 +942,6 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
/* if this is a small write inside eof, kick off defrag */
if (num_bytes < 64 * 1024 &&
@@ -969,18 +968,33 @@ static noinline int cow_file_range(struct inode *inode,
}
}
- BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(root->fs_info->super_copy));
+ BUG_ON(num_bytes > btrfs_super_total_bytes(root->fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
- while (disk_num_bytes > 0) {
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = root->sectorsize;
+
+ while (num_bytes > 0) {
unsigned long op;
- cur_alloc_size = disk_num_bytes;
+ cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size,
- root->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1033,7 +1047,7 @@ static noinline int cow_file_range(struct inode *inode,
goto out_drop_extent_cache;
}
- if (disk_num_bytes < cur_alloc_size)
+ if (num_bytes < cur_alloc_size)
break;
/* we're not doing compressed IO, don't unlock the first
@@ -1050,8 +1064,10 @@ static noinline int cow_file_range(struct inode *inode,
start + ram_size - 1, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
op);
- disk_num_bytes -= cur_alloc_size;
- num_bytes -= cur_alloc_size;
+ if (num_bytes < cur_alloc_size)
+ num_bytes = 0;
+ else
+ num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
@@ -5354,11 +5370,13 @@ no_delete:
}
/*
- * this returns the key found in the dir entry in the location pointer.
+ * Return the key found in the dir entry in the location pointer, fill @type
+ * with BTRFS_FT_*, and return 0.
+ *
* If no dir entries were found, location->objectid is 0.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
- struct btrfs_key *location)
+ struct btrfs_key *location, u8 *type)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
@@ -5380,6 +5398,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
goto out_err;
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
+ if (!ret)
+ *type = btrfs_dir_type(path->nodes[0], di);
out:
btrfs_free_path(path);
return ret;
@@ -5665,19 +5685,25 @@ static struct inode *new_simple_dir(struct super_block *s,
return inode;
}
+static inline u8 btrfs_inode_type(struct inode *inode)
+{
+ return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct inode *inode;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ u8 di_type = 0;
int index;
int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
if (ret < 0)
return ERR_PTR(ret);
@@ -5686,6 +5712,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+ if (IS_ERR(inode))
+ return inode;
+
+ /* Do extra check against inode mode with di_type */
+ if (btrfs_inode_type(inode) != di_type) {
+ btrfs_crit(root->fs_info,
+"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+ inode->i_mode, btrfs_inode_type(inode),
+ di_type);
+ iput(inode);
+ return ERR_PTR(-EUCLEAN);
+ }
return inode;
}
@@ -6299,11 +6337,6 @@ fail:
return ERR_PTR(ret);
}
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
- return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
/*
* utility function to add 'inode' into 'parent_inode' with
* a give name and a given sequence number.
@@ -6888,6 +6921,14 @@ again:
extent_start = found_key.offset;
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ /* Only regular file could have regular/prealloc extent */
+ if (!S_ISREG(inode->i_mode)) {
+ err = -EUCLEAN;
+ btrfs_crit(root->fs_info,
+ "regular/prealloc extent found for non-regular inode %llu",
+ btrfs_ino(inode));
+ goto out;
+ }
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
@@ -8294,7 +8335,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
- atomic_inc(&dip->pending_bios);
while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
if (map_length < submit_len + bvec->bv_len ||
@@ -8351,7 +8391,8 @@ submit:
if (!ret)
return 0;
- bio_put(bio);
+ if (bio != orig_bio)
+ bio_put(bio);
out_err:
dip->errors = 1;
/*
@@ -8398,7 +8439,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
io_bio->bi_private = dip;
dip->orig_bio = io_bio;
dip->dio_bio = dio_bio;
- atomic_set(&dip->pending_bios, 0);
+ atomic_set(&dip->pending_bios, 1);
btrfs_bio = btrfs_io_bio(io_bio);
btrfs_bio->logical = file_offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 119b1c5c279b..f35e18e76f16 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -59,6 +59,7 @@
#include "props.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "tree-log.h"
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -2016,9 +2017,14 @@ static noinline int copy_to_sk(struct btrfs_root *root,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2026,10 +2032,14 @@ static noinline int copy_to_sk(struct btrfs_root *root,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2119,6 +2129,11 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf + sk_offset,
+ *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
@@ -2540,6 +2555,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
+ if (!err)
+ btrfs_record_snapshot_destroy(trans, dir);
ret = btrfs_end_transaction(trans, root);
if (ret && !err)
err = ret;
@@ -3839,6 +3856,8 @@ process_slot:
ret = -EINTR;
goto out;
}
+
+ cond_resched();
}
ret = 0;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 734babb6626c..bc4cc417e7ab 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -462,6 +462,7 @@ next2:
break;
}
out:
+ btrfs_free_path(path);
fs_info->qgroup_flags |= flags;
if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
fs_info->quota_enabled = 0;
@@ -470,7 +471,6 @@ out:
ret >= 0) {
ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
}
- btrfs_free_path(path);
if (ret < 0) {
ulist_free(fs_info->qgroup_ulist);
@@ -2288,8 +2288,10 @@ out:
}
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
- if (done && !ret)
+ if (done && !ret) {
ret = 1;
+ fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+ }
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index df04309390bb..628b6a046093 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1328,9 +1328,7 @@ static void __del_reloc_root(struct btrfs_root *root)
RB_CLEAR_NODE(&node->rb_node);
}
spin_unlock(&rc->reloc_root_tree.lock);
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
+ ASSERT(!node || (struct btrfs_root *)node->data == root);
}
spin_lock(&root->fs_info->trans_lock);
@@ -1787,8 +1785,8 @@ int replace_path(struct btrfs_trans_handle *trans,
int ret;
int slot;
- BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
last_snapshot = btrfs_root_last_snapshot(&src->root_item);
again:
@@ -1820,7 +1818,7 @@ again:
parent = eb;
while (1) {
level = btrfs_header_level(parent);
- BUG_ON(level < lowest_level);
+ ASSERT(level >= lowest_level);
ret = btrfs_bin_search(parent, &key, level, &slot);
if (ret && slot > 0)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index cc9ccc42f469..0b41a88ef9e9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -918,11 +918,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
have_csum = sblock_to_check->pagev[0]->have_csum;
dev = sblock_to_check->pagev[0]->dev;
- if (sctx->is_dev_replace && !is_metadata && !have_csum) {
- sblocks_for_recheck = NULL;
- goto nodatasum_case;
- }
-
/*
* read all mirrors one after the other. This includes to
* re-read the extent or metadata block that failed (that was
@@ -1035,13 +1030,19 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
goto out;
}
- if (!is_metadata && !have_csum) {
+ /*
+ * NOTE: Even for nodatasum case, it's still possible that it's a
+ * compressed data extent, thus scrub_fixup_nodatasum(), which write
+ * inode page cache onto disk, could cause serious data corruption.
+ *
+ * So here we could only read from disk, and hope our recovery could
+ * reach disk before the newer write.
+ */
+ if (0 && !is_metadata && !have_csum) {
struct scrub_fixup_nodatasum *fixup_nodatasum;
WARN_ON(sctx->is_dev_replace);
-nodatasum_case:
-
/*
* !is_metadata and !have_csum, this means that the data
* might not be COW'ed, that it might be modified
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f35884a431c1..de0ebb3b3cd3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -34,6 +34,7 @@
#include "disk-io.h"
#include "btrfs_inode.h"
#include "transaction.h"
+#include "xattr.h"
static int g_verbose = 0;
@@ -4194,6 +4195,10 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
struct fs_path *p;
posix_acl_xattr_header dummy_acl;
+ /* Capabilities are emitted by finish_inode_if_needed */
+ if (!strncmp(name, XATTR_NAME_CAPS, name_len))
+ return 0;
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -4733,6 +4738,64 @@ static int send_extent_data(struct send_ctx *sctx,
return 0;
}
+/*
+ * Search for a capability xattr related to sctx->cur_ino. If the capability is
+ * found, call send_set_xattr function to emit it.
+ *
+ * Return 0 if there isn't a capability, or when the capability was emitted
+ * successfully, or < 0 if an error occurred.
+ */
+static int send_capabilities(struct send_ctx *sctx)
+{
+ struct fs_path *fspath = NULL;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *di;
+ struct extent_buffer *leaf;
+ unsigned long data_ptr;
+ char *buf = NULL;
+ int buf_len;
+ int ret = 0;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
+ XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
+ if (!di) {
+ /* There is no xattr for this inode */
+ goto out;
+ } else if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ buf_len = btrfs_dir_data_len(leaf, di);
+
+ fspath = fs_path_alloc();
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!fspath || !buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+ if (ret < 0)
+ goto out;
+
+ data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
+ read_extent_buffer(leaf, buf, data_ptr, buf_len);
+
+ ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
+ strlen(XATTR_NAME_CAPS), buf, buf_len);
+out:
+ kfree(buf);
+ fs_path_free(fspath);
+ btrfs_free_path(path);
+ return ret;
+}
+
static int clone_range(struct send_ctx *sctx,
struct clone_root *clone_root,
const u64 disk_byte,
@@ -5444,6 +5507,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
}
+ ret = send_capabilities(sctx);
+ if (ret < 0)
+ goto out;
+
/*
* If other directory inodes depended on our current directory
* inode's move/rename, now do their move/rename operations.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 404051bf5cba..77e6ce0e1e35 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -843,8 +843,8 @@ out:
return error;
}
-static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
- u64 subvol_objectid)
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid)
{
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_root *fs_root;
@@ -1120,6 +1120,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
struct btrfs_root *root = info->tree_root;
char *compress_type;
+ const char *subvol_name;
if (btrfs_test_opt(root, DEGRADED))
seq_puts(seq, ",degraded");
@@ -1204,8 +1205,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
#endif
seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
- seq_puts(seq, ",subvol=");
- seq_dentry(seq, dentry, " \t\n\\");
+ subvol_name = btrfs_get_subvol_name_from_objectid(info,
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+ if (!IS_ERR(subvol_name)) {
+ seq_puts(seq, ",subvol=");
+ seq_escape(seq, subvol_name, " \t\n\\");
+ kfree(subvol_name);
+ }
return 0;
}
@@ -1323,8 +1329,8 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
goto out;
}
}
- subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
- subvol_objectid);
+ subvol_name = btrfs_get_subvol_name_from_objectid(
+ btrfs_sb(mnt->mnt_sb), subvol_objectid);
if (IS_ERR(subvol_name)) {
root = ERR_CAST(subvol_name);
subvol_name = NULL;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 69255148f0c8..2825cbe3ea8d 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -48,7 +48,13 @@ static struct file_system_type test_type = {
struct inode *btrfs_new_test_inode(void)
{
- return new_inode(test_mnt->mnt_sb);
+ struct inode *inode;
+
+ inode = new_inode(test_mnt->mnt_sb);
+ if (inode)
+ inode_init_owner(inode, NULL, S_IFREG);
+
+ return inode;
}
int btrfs_init_test_fs(void)
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 054fc0d97131..5ff676df698f 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -235,6 +235,7 @@ static noinline int test_btrfs_get_extent(void)
return ret;
}
+ inode->i_mode = S_IFREG;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 64e449eb2ecd..f0675b7c95ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1264,8 +1264,10 @@ int btrfs_defrag_root(struct btrfs_root *root)
while (1) {
trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
ret = btrfs_defrag_leaves(trans, root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4320f346b0b9..ff4df1783219 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1511,6 +1511,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
if (ret == 1) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -1523,17 +1524,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ break;
btrfs_release_path(path);
inode = read_one_inode(root, key.offset);
- if (!inode)
- return -EIO;
+ if (!inode) {
+ ret = -EIO;
+ break;
+ }
ret = fixup_inode_link_count(trans, root, inode);
iput(inode);
if (ret)
- goto out;
+ break;
/*
* fixup on a directory may create new entries,
@@ -1542,8 +1545,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
*/
key.offset = (u64)-1;
}
- ret = 0;
-out:
btrfs_release_path(path);
return ret;
}
@@ -1582,8 +1583,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
- } else {
- BUG(); /* Logic Error */
}
iput(inode);
@@ -3169,11 +3168,13 @@ fail:
btrfs_free_path(path);
out_unlock:
mutex_unlock(&BTRFS_I(dir)->log_mutex);
- if (ret == -ENOSPC) {
+ if (err == -ENOSPC) {
btrfs_set_log_full_commit(root->fs_info, trans);
- ret = 0;
- } else if (ret < 0)
- btrfs_abort_transaction(trans, root, ret);
+ err = 0;
+ } else if (err < 0 && err != -ENOENT) {
+ /* ENOENT can be returned if the entry hasn't been fsynced yet */
+ btrfs_abort_transaction(trans, root, err);
+ }
btrfs_end_log_trans(root);
@@ -3333,6 +3334,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
* search and this search we'll not find the key again and can just
* bail.
*/
+search:
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
if (ret != 0)
goto done;
@@ -3352,6 +3354,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
if (min_key.objectid != ino || min_key.type != key_type)
goto done;
+
+ if (need_resched()) {
+ btrfs_release_path(path);
+ cond_resched();
+ goto search;
+ }
+
ret = overwrite_item(trans, log, dst_path, src, i,
&min_key);
if (ret) {
@@ -3733,11 +3742,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
log->fs_info->csum_root,
ds + cs, ds + cs + cl - 1,
&ordered_sums, 0);
- if (ret) {
- btrfs_release_path(dst_path);
- kfree(ins_data);
- return ret;
- }
+ if (ret)
+ break;
}
}
}
@@ -3750,7 +3756,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* we have to do this after the loop above to avoid changing the
* log tree while trying to change the log tree.
*/
- ret = 0;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
@@ -5695,6 +5700,21 @@ record:
}
/*
+ * Make sure that if someone attempts to fsync the parent directory of a deleted
+ * snapshot, it ends up triggering a transaction commit. This is to guarantee
+ * that after replaying the log tree of the parent directory's root we will not
+ * see the snapshot anymore and at log replay time we will not see any log tree
+ * corresponding to the deleted snapshot's root, which could lead to replaying
+ * it after replaying the log tree of the parent directory (which would replay
+ * the snapshot delete operation).
+ */
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct inode *dir)
+{
+ BTRFS_I(dir)->last_unlink_trans = trans->transid;
+}
+
+/*
* Call this after adding a new name for a file and it will properly
* update the log to reflect the new name.
*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 6916a781ea02..a9f1b75d080d 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -79,6 +79,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode,
int for_rename);
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct inode *dir);
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *old_dir,
struct dentry *parent);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 55ce6543050d..d6383d362e27 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2357,9 +2357,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
btrfs_set_super_num_devices(root->fs_info->super_copy,
tmp + 1);
- /* add sysfs device entry */
- btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
-
/*
* we've got more storage, clear any full flags on the space
* infos
@@ -2367,6 +2364,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
btrfs_clear_space_info_full(root->fs_info);
unlock_chunks(root);
+
+ /* add sysfs device entry */
+ btrfs_sysfs_add_device_link(root->fs_info->fs_devices, device);
+
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
@@ -4065,6 +4066,7 @@ static int btrfs_uuid_scan_kthread(void *data)
goto skip;
}
update_tree:
+ btrfs_release_path(path);
if (!btrfs_is_empty_uuid(root_item.uuid)) {
ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
root_item.uuid,
@@ -4090,6 +4092,7 @@ update_tree:
}
skip:
+ btrfs_release_path(path);
if (trans) {
ret = btrfs_end_transaction(trans, fs_info->uuid_root);
trans = NULL;
@@ -4097,7 +4100,6 @@ skip:
break;
}
- btrfs_release_path(path);
if (key.offset < (u64)-1) {
key.offset++;
} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
@@ -6261,6 +6263,13 @@ static int btrfs_check_chunk_valid(struct btrfs_root *root,
return -EIO;
}
+ if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EUCLEAN;
+ }
if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
btrfs_err(root->fs_info, "missing chunk type flag: 0x%llx", type);
return -EIO;
@@ -6694,6 +6703,14 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
lock_chunks(root);
/*
+ * It is possible for mount and umount to race in such a way that
+ * we execute this code path, but open_fs_devices failed to clear
+ * total_rw_bytes. We certainly want it cleared before reading the
+ * device items, so clear it here.
+ */
+ root->fs_info->fs_devices->total_rw_bytes = 0;
+
+ /*
* Read all device items, and then all the chunk items. All
* device items are found before any chunk item (their object id
* is smaller than the lowest possible object id for a chunk
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c05ab2ec0fef..9047f0e64bc0 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -64,9 +64,9 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
object = container_of(op->op.object, struct cachefiles_object, fscache);
spin_lock(&object->work_lock);
list_add_tail(&monitor->op_link, &op->to_do);
+ fscache_enqueue_retrieval(op);
spin_unlock(&object->work_lock);
- fscache_enqueue_retrieval(op);
fscache_put_retrieval(op);
return 0;
}
@@ -125,7 +125,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
_debug("reissue read");
ret = bmapping->a_ops->readpage(NULL, backpage);
if (ret < 0)
- goto unlock_discard;
+ goto discard;
}
/* but the page may have been read before the monitor was installed, so
@@ -142,6 +142,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
unlock_discard:
unlock_page(backpage);
+discard:
spin_lock_irq(&object->work_lock);
list_del(&monitor->op_link);
spin_unlock_irq(&object->work_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 22bae2b434e2..26de74684c17 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -72,10 +72,6 @@ static int ceph_set_page_dirty(struct page *page)
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_snap_context *snapc;
- int ret;
-
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
if (PageDirty(page)) {
dout("%p set_page_dirty %p idx %lu -- already dirty\n",
@@ -121,11 +117,7 @@ static int ceph_set_page_dirty(struct page *page)
page->private = (unsigned long)snapc;
SetPagePrivate(page);
- ret = __set_page_dirty_nobuffers(page);
- WARN_ON(!PageLocked(page));
- WARN_ON(!page->mapping);
-
- return ret;
+ return __set_page_dirty_nobuffers(page);
}
/*
@@ -1243,7 +1235,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
- loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+ loff_t off = (loff_t)vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa4df4a02252..154c47282a34 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -920,12 +920,19 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc;
int removed = 0;
+ /* 'ci' being NULL means the remove have already occurred */
+ if (!ci) {
+ dout("%s: cap inode is NULL\n", __func__);
+ return;
+ }
+
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap)
@@ -1538,6 +1545,8 @@ static int __mark_caps_flushing(struct inode *inode,
* try to invalidate mapping pages without blocking.
*/
static int try_nonblocking_invalidate(struct inode *inode)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
u32 invalidating_gen = ci->i_rdcache_gen;
@@ -1734,8 +1743,12 @@ retry_locked:
}
/* want more caps from mds? */
- if (want & ~(cap->mds_wanted | cap->issued))
- goto ack;
+ if (want & ~cap->mds_wanted) {
+ if (want & ~(cap->mds_wanted | cap->issued))
+ goto ack;
+ if (!__cap_is_valid(cap))
+ goto ack;
+ }
/* things we might delay */
if ((cap->issued & ~retain) == 0 &&
@@ -1773,12 +1786,24 @@ ack:
if (mutex_trylock(&session->s_mutex) == 0) {
dout("inverting session/ino locks on %p\n",
session);
+ session = ceph_get_mds_session(session);
spin_unlock(&ci->i_ceph_lock);
if (took_snap_rwsem) {
up_read(&mdsc->snap_rwsem);
took_snap_rwsem = 0;
}
- mutex_lock(&session->s_mutex);
+ if (session) {
+ mutex_lock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ } else {
+ /*
+ * Because we take the reference while
+ * holding the i_ceph_lock, it should
+ * never be NULL. Throw a warning if it
+ * ever is.
+ */
+ WARN_ON_ONCE(true);
+ }
goto retry;
}
}
@@ -3289,6 +3314,7 @@ retry:
WARN_ON(1);
tsession = NULL;
target = -1;
+ mutex_lock(&session->s_mutex);
}
goto retry;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fe02ae7f056a..ff9e60daf086 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -146,6 +146,11 @@ static struct dentry *__get_parent(struct super_block *sb,
}
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err) {
+ ceph_mdsc_put_request(req);
+ return ERR_PTR(err);
+ }
+
inode = req->r_target_inode;
if (inode)
ihold(inode);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c8222bfe1e56..3e6ebe40f06f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1433,6 +1433,7 @@ const struct file_operations ceph_file_fops = {
.mmap = ceph_mmap,
.fsync = ceph_fsync,
.lock = ceph_lock,
+ .setlease = simple_nosetlease,
.flock = ceph_flock,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a5de8e22629b..b7fd7d69be07 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3428,6 +3428,9 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
ceph_check_delayed_caps(mdsc);
+ if (mdsc->stopping)
+ return;
+
mutex_lock(&mdsc->mutex);
renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
renew_caps = time_after_eq(jiffies, HZ*renew_interval +
@@ -3752,7 +3755,16 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
{
dout("stop\n");
- cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+ /*
+ * Make sure the delayed work stopped before releasing
+ * the resources.
+ *
+ * Because the cancel_delayed_work_sync() will only
+ * guarantee that the work finishes executing. But the
+ * delayed work will re-arm itself again after that.
+ */
+ flush_delayed_work(&mdsc->delayed_work);
+
if (mdsc->mdsmap)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f1f3bb812799..9154a2d7b195 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -472,6 +472,85 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
return 0;
}
+/**
+ * cdev_set_parent() - set the parent kobject for a char device
+ * @p: the cdev structure
+ * @kobj: the kobject to take a reference to
+ *
+ * cdev_set_parent() sets a parent kobject which will be referenced
+ * appropriately so the parent is not freed before the cdev. This
+ * should be called before cdev_add.
+ */
+void cdev_set_parent(struct cdev *p, struct kobject *kobj)
+{
+ WARN_ON(!kobj->state_initialized);
+ p->kobj.parent = kobj;
+}
+
+/**
+ * cdev_device_add() - add a char device and it's corresponding
+ * struct device, linkink
+ * @dev: the device structure
+ * @cdev: the cdev structure
+ *
+ * cdev_device_add() adds the char device represented by @cdev to the system,
+ * just as cdev_add does. It then adds @dev to the system using device_add
+ * The dev_t for the char device will be taken from the struct device which
+ * needs to be initialized first. This helper function correctly takes a
+ * reference to the parent device so the parent will not get released until
+ * all references to the cdev are released.
+ *
+ * This helper uses dev->devt for the device number. If it is not set
+ * it will not add the cdev and it will be equivalent to device_add.
+ *
+ * This function should be used whenever the struct cdev and the
+ * struct device are members of the same structure whose lifetime is
+ * managed by the struct device.
+ *
+ * NOTE: Callers must assume that userspace was able to open the cdev and
+ * can call cdev fops callbacks at any time, even if this function fails.
+ */
+int cdev_device_add(struct cdev *cdev, struct device *dev)
+{
+ int rc = 0;
+
+ if (dev->devt) {
+ cdev_set_parent(cdev, &dev->kobj);
+
+ rc = cdev_add(cdev, dev->devt, 1);
+ if (rc)
+ return rc;
+ }
+
+ rc = device_add(dev);
+ if (rc)
+ cdev_del(cdev);
+
+ return rc;
+}
+
+/**
+ * cdev_device_del() - inverse of cdev_device_add
+ * @dev: the device structure
+ * @cdev: the cdev structure
+ *
+ * cdev_device_del() is a helper function to call cdev_del and device_del.
+ * It should be used whenever cdev_device_add is used.
+ *
+ * If dev->devt is not set it will not remove the cdev and will be equivalent
+ * to device_del.
+ *
+ * NOTE: This guarantees that associated sysfs callbacks are not running
+ * or runnable, however any cdevs already open will remain and their fops
+ * will still be callable even after this function returns.
+ */
+void cdev_device_del(struct cdev *cdev, struct device *dev)
+{
+ device_del(dev);
+ if (dev->devt)
+ cdev_del(cdev);
+}
+
static void cdev_unmap(dev_t dev, unsigned count)
{
kobj_unmap(cdev_map, dev, count);
@@ -483,6 +562,10 @@ static void cdev_unmap(dev_t dev, unsigned count)
*
* cdev_del() removes @p from the system, possibly freeing the structure
* itself.
+ *
+ * NOTE: This guarantees that cdev device will no longer be able to be
+ * opened, however any cdevs already open will remain and their fops will
+ * still be callable even after cdev_del returns.
*/
void cdev_del(struct cdev *p)
{
@@ -571,5 +654,8 @@ EXPORT_SYMBOL(cdev_init);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_set_parent);
+EXPORT_SYMBOL(cdev_device_add);
+EXPORT_SYMBOL(cdev_device_del);
EXPORT_SYMBOL(__register_chrdev);
EXPORT_SYMBOL(__unregister_chrdev);
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index a3b56544c21b..ae1f2817bd6a 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -541,8 +541,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -552,8 +552,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 1\n",
+ cls, con, tag, end);
return 0;
}
@@ -563,8 +563,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -575,8 +575,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d sequence_end = %p exit 1\n",
+ cls, con, tag, sequence_end);
return 0;
}
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 211ac472cb9d..e5e780145728 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -367,14 +367,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen,
if (!dst)
return NULL;
cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
- NO_MAP_UNI_RSVD);
+ NO_MAP_UNI_RSVD);
} else {
- len = strnlen(src, maxlen);
- len++;
- dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return NULL;
- strlcpy(dst, src, len);
+ dst = kstrndup(src, maxlen, GFP_KERNEL);
}
return dst;
@@ -493,7 +488,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
else if (map_chars == SFM_MAP_UNI_RSVD) {
bool end_of_string;
- if (i == srclen - 1)
+ /**
+ * Remap spaces and periods found at the end of every
+ * component of the path. The special cases of '.' and
+ * '..' do not need to be dealt with explicitly because
+ * they are addressed in namei.c:link_path_walk().
+ **/
+ if ((i == srclen - 1) || (source[i+1] == '\\'))
end_of_string = true;
else
end_of_string = false;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4f4fc9ff3636..5666eb49bbbd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -204,7 +204,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
rc = server->ops->queryfs(xid, tcon, buf);
free_xid(xid);
- return 0;
+ return rc;
}
static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b9b8f19dce0e..fa07f7cb85a5 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -184,6 +184,18 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
* reconnect the same SMB session
*/
mutex_lock(&ses->session_mutex);
+
+ /*
+ * Recheck after acquire mutex. If another thread is negotiating
+ * and the server never sends an answer the socket will be closed
+ * and tcpStatus set to reconnect.
+ */
+ if (server->tcpStatus == CifsNeedReconnect) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&ses->session_mutex);
+ goto out;
+ }
+
rc = cifs_negotiate_protocol(0, ses);
if (rc == 0 && ses->need_reconnect)
rc = cifs_setup_session(0, ses, nls_codepage);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index cf104bbe30a1..cda22b312a4c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -338,8 +338,10 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server)
return rc;
}
+ spin_lock(&cifs_tcp_ses_lock);
rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr,
strlen(ipaddr));
+ spin_unlock(&cifs_tcp_ses_lock);
kfree(ipaddr);
return !rc ? -1 : 0;
@@ -546,20 +548,21 @@ static bool
server_unresponsive(struct TCP_Server_Info *server)
{
/*
- * We need to wait 2 echo intervals to make sure we handle such
+ * We need to wait 3 echo intervals to make sure we handle such
* situations right:
* 1s client sends a normal SMB request
- * 2s client gets a response
+ * 3s client gets a response
* 30s echo workqueue job pops, and decides we got a response recently
* and don't need to send another
* ...
* 65s kernel_recvmsg times out, and we see that we haven't gotten
* a response in >60s.
*/
- if (server->tcpStatus == CifsGood &&
- time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
+ if ((server->tcpStatus == CifsGood ||
+ server->tcpStatus == CifsNeedNegotiate) &&
+ time_after(jiffies, server->lstrp + 3 * SMB_ECHO_INTERVAL)) {
cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
- server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+ server->hostname, (3 * SMB_ECHO_INTERVAL) / HZ);
cifs_reconnect(server);
wake_up(&server->response_q);
return true;
@@ -780,6 +783,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
+ cancel_delayed_work_sync(&server->echo);
+
spin_lock(&GlobalMid_Lock);
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
@@ -2956,9 +2961,10 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
- if (IS_ERR(tlink)) {
+ if (tlink == NULL) {
+ /* can not match superblock if tlink were ever null */
spin_unlock(&cifs_tcp_ses_lock);
- return rc;
+ return 0;
}
tcon = tlink_tcon(tlink);
ses = tcon->ses;
@@ -4200,9 +4206,12 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->retry = master_tcon->retry;
vol_info->nocase = master_tcon->nocase;
vol_info->local_lease = master_tcon->local_lease;
+ vol_info->resilient = master_tcon->use_resilient;
+ vol_info->persistent = master_tcon->use_persistent;
vol_info->no_linux_ext = !master_tcon->unix_ext;
vol_info->sectype = master_tcon->ses->sectype;
vol_info->sign = master_tcon->ses->sign;
+ vol_info->seal = master_tcon->seal;
rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
if (rc) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index be16da31cbcc..9f1641324a81 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -831,6 +831,7 @@ static int
cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
{
struct inode *inode;
+ int rc;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -840,8 +841,25 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode)))
CIFS_I(inode)->time = 0; /* force reval */
- if (cifs_revalidate_dentry(direntry))
- return 0;
+ rc = cifs_revalidate_dentry(direntry);
+ if (rc) {
+ cifs_dbg(FYI, "cifs_revalidate_dentry failed with rc=%d", rc);
+ switch (rc) {
+ case -ENOENT:
+ case -ESTALE:
+ /*
+ * Those errors mean the dentry is invalid
+ * (file was deleted or recreated)
+ */
+ return 0;
+ default:
+ /*
+ * Otherwise some unexpected error happened
+ * report it as-is to VFS layer
+ */
+ return rc;
+ }
+ }
else {
/*
* If the inode wasn't known to be a dfs entry when
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2ffdaedca7e9..5bc617cb7721 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -163,6 +163,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
goto posix_open_ret;
}
} else {
+ cifs_revalidate_mapping(*pinode);
cifs_fattr_to_inode(*pinode, &fattr);
}
@@ -3230,7 +3231,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
* than it negotiated since it will refuse the read
* then.
*/
- if ((tcon->ses) && !(tcon->ses->capabilities &
+ if (!(tcon->ses->capabilities &
tcon->ses->server->vals->cap_large_files)) {
current_read_size = min_t(uint,
current_read_size, CIFSMaxBufSize);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 9bc7a29f88d6..2d3918cdcc28 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -602,7 +602,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct)
return 0;
out_free_smb_buf:
- kfree(smb_buf);
+ cifs_small_buf_release(smb_buf);
sess_data->iov[0].iov_base = NULL;
sess_data->iov[0].iov_len = 0;
sess_data->buf0_type = CIFS_NO_BUFFER;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 44198b9a5315..19baeb4ca511 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -633,8 +633,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
}
}
spin_unlock(&cifs_tcp_ses_lock);
- cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
- return false;
+ cifs_dbg(FYI, "No file id matched, oplock break ignored\n");
+ return true;
}
void
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0fcf42401a5d..c173d047b44b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -619,6 +619,8 @@ smb2_clone_range(const unsigned int xid,
cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
+ kfree(retbuf);
+ retbuf = NULL;
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, (char *)pcchunk,
@@ -1145,6 +1147,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
inode = d_inode(cfile->dentry);
cifsi = CIFS_I(inode);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
if (keep_size == false)
@@ -1201,6 +1209,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
return -EOPNOTSUPP;
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d4472a494758..4ffd5e177288 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -249,6 +249,18 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
* the same SMB session
*/
mutex_lock(&tcon->ses->session_mutex);
+
+ /*
+ * Recheck after acquire mutex. If another thread is negotiating
+ * and the server never sends an answer the socket will be closed
+ * and tcpStatus set to reconnect.
+ */
+ if (server->tcpStatus == CifsNeedReconnect) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto out;
+ }
+
rc = cifs_negotiate_protocol(0, tcon->ses);
if (!rc && tcon->ses->need_reconnect) {
rc = cifs_setup_session(0, tcon->ses, nls_codepage);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 44f49d86d714..49c06f3cd952 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -780,6 +780,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
struct buffer_head *map_bh)
{
int ret = 0;
+ int boundary = sdio->boundary; /* dio_send_cur_page may clear it */
if (dio->rw & WRITE) {
/*
@@ -818,10 +819,10 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
out:
/*
- * If sdio->boundary then we want to schedule the IO now to
+ * If boundary then we want to schedule the IO now to
* avoid metadata seeks.
*/
- if (sdio->boundary) {
+ if (boundary) {
ret = dio_send_cur_page(dio, sdio, map_bh);
if (sdio->bio)
dio_bio_submit(dio, sdio);
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index eea64912c9c0..3b79c0284a30 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -545,6 +545,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
if (bucket >= ls->ls_rsbtbl_size) {
kfree(ri);
+ ++*pos;
return NULL;
}
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5eff6ea3e27f..63e856d90ed0 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -92,7 +92,6 @@ do { \
__LINE__, __FILE__, #x, jiffies); \
{do} \
printk("\n"); \
- BUG(); \
panic("DLM: Record message above and reboot.\n"); \
} \
}
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b14bb2c46042..499f54f99891 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -626,6 +626,9 @@ static int new_lockspace(const char *name, const char *cluster,
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
+ /* let kobject handle freeing of ls if there's an error */
+ do_unreg = 1;
+
ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name);
@@ -633,9 +636,6 @@ static int new_lockspace(const char *name, const char *cluster,
goto out_recoverd;
kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
- /* let kobject handle freeing of ls if there's an error */
- do_unreg = 1;
-
/* This uevent triggers dlm_controld in userspace to add us to the
group of nodes that are members of this lockspace (managed by the
cluster infrastructure.) Once it's done that, it tells us who the
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9d7a4a714907..99f4cd91910f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -554,7 +554,7 @@ static void close_connection(struct connection *con, bool and_other,
}
if (con->othercon && and_other) {
/* Will only re-enter once. */
- close_connection(con->othercon, false, true, true);
+ close_connection(con->othercon, false, tx, rx);
}
if (con->rx_page) {
__free_page(con->rx_page);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 83e9f6272bfb..f246f1760ba2 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -346,10 +346,8 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct extent_crypt_result ecr;
int rc = 0;
- if (!crypt_stat || !crypt_stat->tfm
- || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
- return -EINVAL;
-
+ BUG_ON(!crypt_stat || !crypt_stat->tfm
+ || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 4f4d0474bee9..cd2a3199a814 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -507,6 +507,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out;
}
+ if (!dev_name) {
+ rc = -EINVAL;
+ err = "Device name cannot be null";
+ goto out;
+ }
+
rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
if (rc) {
err = "Error parsing options";
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index e2ab6d0497f2..151884b95ee2 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
#include <linux/ctype.h>
+#include <linux/kmemleak.h>
#include <linux/slab.h>
#include "internal.h"
@@ -138,6 +139,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
var->var.VariableName[i] = '\0';
inode->i_private = var;
+ kmemleak_ignore(var);
efivar_entry_add(var, &efivarfs_list);
d_instantiate(dentry, inode);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index abb244b06024..fca235020312 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -147,6 +147,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
+ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */
+ strreplace(name, '/', '!');
+
inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
is_removable);
if (!inode)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 240d9ceb8d0c..2ef15a4018d0 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -217,8 +217,7 @@ struct eventpoll {
struct file *file;
/* used to optimize loop detection check */
- int visited;
- struct list_head visited_list_link;
+ u64 gen;
};
/* Wait structure used by the poll hooks */
@@ -262,6 +261,8 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+static u64 loop_check_gen = 0;
+
/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;
@@ -277,9 +278,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
@@ -1234,7 +1232,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
- const char *name;
+ struct name_snapshot n;
struct wakeup_source *ws;
if (!epi->ep->ws) {
@@ -1243,8 +1241,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
return -ENOMEM;
}
- name = epi->ffd.file->f_path.dentry->d_name.name;
- ws = wakeup_source_register(name);
+ take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+ ws = wakeup_source_register(n.name);
+ release_dentry_name_snapshot(&n);
if (!ws)
return -ENOMEM;
@@ -1304,6 +1303,22 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL);
}
+ /* Add the current item to the list of active epoll hook for this file */
+ spin_lock(&tfile->f_lock);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+ spin_unlock(&tfile->f_lock);
+
+ /*
+ * Add the current item to the RB tree. All RB tree operations are
+ * protected by "mtx", and ep_insert() is called with "mtx" held.
+ */
+ ep_rbtree_insert(ep, epi);
+
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (full_check && reverse_path_check())
+ goto error_remove_epi;
+
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1326,22 +1341,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
if (epi->nwait < 0)
goto error_unregister;
- /* Add the current item to the list of active epoll hook for this file */
- spin_lock(&tfile->f_lock);
- list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
- spin_unlock(&tfile->f_lock);
-
- /*
- * Add the current item to the RB tree. All RB tree operations are
- * protected by "mtx", and ep_insert() is called with "mtx" held.
- */
- ep_rbtree_insert(ep, epi);
-
- /* now check if we've created too many backpaths */
- error = -EINVAL;
- if (full_check && reverse_path_check())
- goto error_remove_epi;
-
/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);
@@ -1367,6 +1366,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
return 0;
+error_unregister:
+ ep_unregister_pollwait(ep, epi);
error_remove_epi:
spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink);
@@ -1374,9 +1375,6 @@ error_remove_epi:
rb_erase(&epi->rbn, &ep->rbr);
-error_unregister:
- ep_unregister_pollwait(ep, epi);
-
/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
@@ -1697,13 +1695,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
- ep->visited = 1;
- list_add(&ep->visited_list_link, &visited_list);
+ ep->gen = loop_check_gen;
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->visited)
+ if (ep_tovisit->gen == loop_check_gen)
continue;
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, epi->ffd.file,
@@ -1719,9 +1716,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
- list_add(&epi->ffd.file->f_tfile_llink,
- &tfile_check_list);
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ if (get_file_rcu(epi->ffd.file))
+ list_add(&epi->ffd.file->f_tfile_llink,
+ &tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -1742,18 +1741,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- int ret;
- struct eventpoll *ep_cur, *ep_next;
-
- ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, file, ep, current);
- /* clear visited list */
- list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
- visited_list_link) {
- ep_cur->visited = 0;
- list_del(&ep_cur->visited_list_link);
- }
- return ret;
}
static void clear_tfile_check_list(void)
@@ -1765,6 +1754,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -1896,19 +1886,20 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_lock_nested(&ep->mtx, 0);
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
+ ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) {
full_check = 1;
mutex_unlock(&ep->mtx);
mutex_lock(&epmutex);
if (is_file_epoll(tf.file)) {
error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
+ if (ep_loop_check(ep, tf.file) != 0)
goto error_tgt_fput;
- }
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
mutex_lock_nested(&ep->mtx, 0);
if (is_file_epoll(tf.file)) {
tep = tf.file->private_data;
@@ -1932,8 +1923,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- if (full_check)
- clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1954,8 +1943,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (full_check)
+ if (full_check) {
+ clear_tfile_check_list();
+ loop_check_gen++;
mutex_unlock(&epmutex);
+ }
fdput(tf);
error_fput:
diff --git a/fs/exec.c b/fs/exec.c
index d882ab7ac6e8..ce111af5784b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -875,7 +875,7 @@ static int exec_mmap(struct mm_struct *mm)
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
- mm_release(tsk, old_mm);
+ exec_mm_release(tsk, old_mm);
if (old_mm) {
sync_mm_rss(old_mm);
@@ -1124,6 +1124,8 @@ int flush_old_exec(struct linux_binprm * bprm)
*/
set_mm_exe_file(bprm->mm, bprm->file);
+ would_dump(bprm, bprm->file);
+
/*
* Release all of the old mmap stuff
*/
@@ -1632,8 +1634,6 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out;
- would_dump(bprm, bprm->file);
-
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 9f9992b37924..2e4747e0aaf0 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -46,10 +46,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
struct ext2_sb_info *sbi = EXT2_SB(sb);
if (block_group >= sbi->s_groups_count) {
- ext2_error (sb, "ext2_get_group_desc",
- "block_group >= groups_count - "
- "block_group = %d, groups_count = %lu",
- block_group, sbi->s_groups_count);
+ WARN(1, "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sbi->s_groups_count);
return NULL;
}
@@ -57,10 +56,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
- ext2_error (sb, "ext2_get_group_desc",
- "Group descriptor not loaded - "
- "block_group = %d, group_desc = %lu, desc = %lu",
- block_group, group_desc, offset);
+ WARN(1, "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, offset);
return NULL;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index efe5fb21c533..d9ef354b821a 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -79,6 +79,7 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir)
if (dir)
le16_add_cpu(&desc->bg_used_dirs_count, -1);
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
+ percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter);
if (dir)
percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
mark_buffer_dirty(bh);
@@ -525,7 +526,7 @@ got:
goto fail;
}
- percpu_counter_add(&sbi->s_freeinodes_counter, -1);
+ percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 02ddec6d8a7d..176a8382e372 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -23,6 +23,7 @@ struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
+ u32 ino;
};
static struct kmem_cache *ext4_system_zone_cachep;
@@ -43,7 +44,8 @@ void ext4_exit_system_zone(void)
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
- if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+ if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
+ entry1->ino == entry2->ino)
return 1;
return 0;
}
@@ -55,9 +57,9 @@ static inline int can_merge(struct ext4_system_zone *entry1,
*/
static int add_system_zone(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
- unsigned int count)
+ unsigned int count, u32 ino)
{
- struct ext4_system_zone *new_entry = NULL, *entry;
+ struct ext4_system_zone *new_entry, *entry;
struct rb_node **n = &sbi->system_blks.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
@@ -68,30 +70,21 @@ static int add_system_zone(struct ext4_sb_info *sbi,
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
- else {
- if (start_blk + count > (entry->start_blk +
- entry->count))
- entry->count = (start_blk + count -
- entry->start_blk);
- new_node = *n;
- new_entry = rb_entry(new_node, struct ext4_system_zone,
- node);
- break;
- }
+ else /* Unexpected overlap of system zones. */
+ return -EFSCORRUPTED;
}
- if (!new_entry) {
- new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
- GFP_KERNEL);
- if (!new_entry)
- return -ENOMEM;
- new_entry->start_blk = start_blk;
- new_entry->count = count;
- new_node = &new_entry->node;
-
- rb_link_node(new_node, parent, n);
- rb_insert_color(new_node, &sbi->system_blks);
- }
+ new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+ GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+ new_entry->start_blk = start_blk;
+ new_entry->count = count;
+ new_entry->ino = ino;
+ new_node = &new_entry->node;
+
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, &sbi->system_blks);
/* Can we merge to the left? */
node = rb_prev(new_node);
@@ -136,6 +129,50 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
printk("\n");
}
+static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
+{
+ struct inode *inode;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_map_blocks map;
+ u32 i = 0, num;
+ int err = 0, n;
+
+ if ((ino < EXT4_ROOT_INO) ||
+ (ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
+ return -EINVAL;
+ inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
+ while (i < num) {
+ cond_resched();
+ map.m_lblk = i;
+ map.m_len = num - i;
+ n = ext4_map_blocks(NULL, inode, &map, 0);
+ if (n < 0) {
+ err = n;
+ break;
+ }
+ if (n == 0) {
+ i++;
+ } else {
+ err = add_system_zone(sbi, map.m_pblk, n, ino);
+ if (err < 0) {
+ if (err == -EFSCORRUPTED) {
+ ext4_error(sb,
+ "blocks %llu-%llu from inode %u "
+ "overlap system zone", map.m_pblk,
+ map.m_pblk + map.m_len - 1, ino);
+ }
+ break;
+ }
+ i += n;
+ }
+ }
+ iput(inode);
+ return err;
+}
+
int ext4_setup_system_zone(struct super_block *sb)
{
ext4_group_t ngroups = ext4_get_groups_count(sb);
@@ -157,16 +194,22 @@ int ext4_setup_system_zone(struct super_block *sb)
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1);
+ ext4_bg_num_gdb(sb, i) + 1, 0);
gdp = ext4_get_group_desc(sb, i, NULL);
- ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+ ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1, 0);
if (ret)
return ret;
- ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+ ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1, 0);
if (ret)
return ret;
ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group);
+ sbi->s_itb_per_group, 0);
+ if (ret)
+ return ret;
+ }
+ if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
+ ret = ext4_protect_reserved_inode(sb,
+ le32_to_cpu(sbi->s_es->s_journal_inum));
if (ret)
return ret;
}
@@ -193,10 +236,11 @@ void ext4_release_system_zone(struct super_block *sb)
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with filesystem metadata blocks.
*/
-int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
- unsigned int count)
+int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+ unsigned int count)
{
struct ext4_system_zone *entry;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct rb_node *n = sbi->system_blks.rb_node;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
@@ -212,6 +256,8 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
+ if (entry->ino == inode->i_ino)
+ return 1;
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
}
@@ -226,11 +272,15 @@ int ext4_check_blockref(const char *function, unsigned int line,
__le32 *bref = p;
unsigned int blk;
+ if (ext4_has_feature_journal(inode->i_sb) &&
+ (inode->i_ino ==
+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ return 0;
+
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
- unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- blk, 1))) {
+ unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
es->s_last_error_block = cpu_to_le64(blk);
ext4_error_inode(inode, function, line, blk,
"invalid block");
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 7b626e942987..96dc313d6251 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -521,7 +521,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct dir_private_info *info = file->private_data;
struct inode *inode = file_inode(file);
struct fname *fname;
- int ret;
+ int ret = 0;
if (!info) {
info = ext4_htree_create_dir_info(file, ctx->pos);
@@ -569,7 +569,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
info->curr_minor_hash,
&info->next_hash);
if (ret < 0)
- return ret;
+ goto finished;
if (ret == 0) {
ctx->pos = ext4_get_htree_eof(file);
break;
@@ -600,7 +600,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
}
finished:
info->last_pos = ctx->pos;
- return 0;
+ return ret < 0 ? ret : 0;
}
static int ext4_dir_open(struct inode * inode, struct file * filp)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab0f08c89d5f..40e3716b4f2e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2398,7 +2398,8 @@ int ext4_insert_dentry(struct inode *dir,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb)) {
+ if (!ext4_has_feature_dir_index(inode->i_sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
/* ext4_iget() should have caught this... */
WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
@@ -2497,8 +2498,19 @@ int do_journal_get_write_access(handle_t *handle,
#define FALL_BACK_TO_NONDELALLOC 1
#define CONVERT_INLINE_DATA 2
-extern struct inode *ext4_iget(struct super_block *, unsigned long);
-extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
+typedef enum {
+ EXT4_IGET_NORMAL = 0,
+ EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
+ EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
+} ext4_iget_flags;
+
+extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line);
+
+#define ext4_iget(sb, ino, flags) \
+ __ext4_iget((sb), (ino), (flags), __func__, __LINE__)
+
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -3122,9 +3134,9 @@ extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
-extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
- ext4_fsblk_t start_blk,
- unsigned int count);
+extern int ext4_inode_block_valid(struct inode *inode,
+ ext4_fsblk_t start_blk,
+ unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
struct inode *, __le32 *, unsigned int);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2d8e73793512..eea4e7547656 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -169,10 +169,13 @@ struct ext4_ext_path {
(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
#define EXT_LAST_INDEX(__hdr__) \
(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
-#define EXT_MAX_EXTENT(__hdr__) \
- (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+ ((le16_to_cpu((__hdr__)->eh_max)) ? \
+ ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \
+ : 0)
#define EXT_MAX_INDEX(__hdr__) \
- (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+ ((le16_to_cpu((__hdr__)->eh_max)) ? \
+ ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0)
static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7414bfd5a58e..07ae78ba27a1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -384,7 +384,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
*/
if (lblock + len <= lblock)
return 0;
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ return ext4_inode_block_valid(inode, block, len);
}
static int ext4_valid_extent_idx(struct inode *inode,
@@ -392,7 +392,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
{
ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
+ return ext4_inode_block_valid(inode, block, 1);
}
static int ext4_valid_extent_entries(struct inode *inode,
@@ -505,6 +505,30 @@ int ext4_ext_check_inode(struct inode *inode)
return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
}
+static void ext4_cache_extents(struct inode *inode,
+ struct ext4_extent_header *eh)
+{
+ struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+ ext4_lblk_t prev = 0;
+ int i;
+
+ for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
+ unsigned int status = EXTENT_STATUS_WRITTEN;
+ ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
+ int len = ext4_ext_get_actual_len(ex);
+
+ if (prev && (prev != lblk))
+ ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
+ EXTENT_STATUS_HOLE);
+
+ if (ext4_ext_is_unwritten(ex))
+ status = EXTENT_STATUS_UNWRITTEN;
+ ext4_es_cache_extent(inode, lblk, len,
+ ext4_ext_pblock(ex), status);
+ prev = lblk + len;
+ }
+}
+
static struct buffer_head *
__read_extent_tree_block(const char *function, unsigned int line,
struct inode *inode, ext4_fsblk_t pblk, int depth,
@@ -535,26 +559,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
*/
if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
struct ext4_extent_header *eh = ext_block_hdr(bh);
- struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
- ext4_lblk_t prev = 0;
- int i;
-
- for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
- unsigned int status = EXTENT_STATUS_WRITTEN;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- int len = ext4_ext_get_actual_len(ex);
-
- if (prev && (prev != lblk))
- ext4_es_cache_extent(inode, prev,
- lblk - prev, ~0,
- EXTENT_STATUS_HOLE);
-
- if (ext4_ext_is_unwritten(ex))
- status = EXTENT_STATUS_UNWRITTEN;
- ext4_es_cache_extent(inode, lblk, len,
- ext4_ext_pblock(ex), status);
- prev = lblk + len;
- }
+ ext4_cache_extents(inode, eh);
}
return bh;
errout:
@@ -860,6 +865,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
eh->eh_entries = 0;
eh->eh_magic = EXT4_EXT_MAGIC;
eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
+ eh->eh_generation = 0;
ext4_mark_inode_dirty(handle, inode);
return 0;
}
@@ -902,6 +908,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
path[0].p_bh = NULL;
i = depth;
+ if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
+ ext4_cache_extents(inode, eh);
/* walk through the tree */
while (i) {
ext_debug("depth %d: num %d, max %d\n",
@@ -1121,6 +1129,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_depth = 0;
+ neh->eh_generation = 0;
/* move remainder of path[depth] to the new leaf */
if (unlikely(path[depth].p_hdr->eh_entries !=
@@ -1198,6 +1207,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
neh->eh_depth = cpu_to_le16(depth - i);
+ neh->eh_generation = 0;
fidx = EXT_FIRST_INDEX(neh);
fidx->ei_block = border;
ext4_idx_store_pblock(fidx, oldblock);
@@ -2891,7 +2901,7 @@ again:
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+ pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial_cluster =
-(long long) EXT4_B2C(sbi, pblk);
}
@@ -3261,7 +3271,10 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_unwritten(ex2);
err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
- if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ if (err != -ENOSPC && err != -EDQUOT)
+ goto out;
+
+ if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) {
err = ext4_ext_zeroout(inode, ex2);
@@ -3287,30 +3300,30 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_pblock(&orig_ex));
}
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le16(ee_len);
- ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
- if (err)
- goto fix_extent_len;
-
- /* update extent status tree */
- err = ext4_zeroout_es(inode, &zero_ex);
-
- goto out;
- } else if (err)
- goto fix_extent_len;
-
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
+ if (!err) {
+ /* update the extent length and mark as initialized */
+ ex->ee_len = cpu_to_le16(ee_len);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (!err)
+ /* update extent status tree */
+ err = ext4_zeroout_es(inode, &zero_ex);
+ /* If we failed at this point, we don't know in which
+ * state the extent tree exactly is so don't try to fix
+ * length of the original extent as it may do even more
+ * damage.
+ */
+ goto out;
+ }
+ }
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
ext4_ext_dirty(handle, inode, path + path->p_depth);
return err;
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err;
}
/*
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ac748b3af1c1..665cf30c95e9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1080,11 +1080,9 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
- if (!nr_to_scan)
- return ret;
-
nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index c31b05f0bd69..685a26e9540f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -405,7 +405,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
*
* We always try to spread first-level directories.
*
- * If there are blockgroups with both free inodes and free blocks counts
+ * If there are blockgroups with both free inodes and free clusters counts
* not worse than average we return one with smallest directory count.
* Otherwise we simply return a random group.
*
@@ -414,7 +414,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
* It's OK to put directory into a group unless
* it has too many directories already (max_dirs) or
* it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
+ * it has too few free clusters left (min_clusters) or
* Parent's group is preferred, if it doesn't satisfy these
* conditions we search cyclically through the rest. If none
* of the groups look good we just look for a group with more
@@ -430,7 +430,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei, grp_free;
- ext4_fsblk_t freeb, avefreec;
+ ext4_fsblk_t freec, avefreec;
unsigned int ndirs;
int max_dirs, min_inodes;
ext4_grpblk_t min_clusters;
@@ -449,9 +449,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
- freeb = EXT4_C2B(sbi,
- percpu_counter_read_positive(&sbi->s_freeclusters_counter));
- avefreec = freeb;
+ freec = percpu_counter_read_positive(&sbi->s_freeclusters_counter);
+ avefreec = freec;
do_div(avefreec, ngroups);
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
@@ -1150,7 +1149,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
if (!ext4_test_bit(bit, bitmap_bh->b_data))
goto bad_orphan;
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
@@ -1278,6 +1277,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
handle_t *handle;
ext4_fsblk_t blk;
int num, ret = 0, used_blks = 0;
+ unsigned long used_inos = 0;
/* This should not happen, but just to be sure check this */
if (sb->s_flags & MS_RDONLY) {
@@ -1308,22 +1308,37 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
* used inodes so we need to skip blocks with used inodes in
* inode table.
*/
- if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
- used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)),
- sbi->s_inodes_per_block);
-
- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
- ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)) <
- EXT4_FIRST_INO(sb)))) {
- ext4_error(sb, "Something is wrong with group %u: "
- "used itable blocks: %d; "
- "itable unused count: %u",
- group, used_blks,
- ext4_itable_unused_count(sb, gdp));
- ret = 1;
- goto err_out;
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
+ used_inos = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ used_blks = DIV_ROUND_UP(used_inos, sbi->s_inodes_per_block);
+
+ /* Bogus inode unused count? */
+ if (used_blks < 0 || used_blks > sbi->s_itb_per_group) {
+ ext4_error(sb, "Something is wrong with group %u: "
+ "used itable blocks: %d; "
+ "itable unused count: %u",
+ group, used_blks,
+ ext4_itable_unused_count(sb, gdp));
+ ret = 1;
+ goto err_out;
+ }
+
+ used_inos += group * EXT4_INODES_PER_GROUP(sb);
+ /*
+ * Are there some uninitialized inodes in the inode table
+ * before the first normal inode?
+ */
+ if ((used_blks != sbi->s_itb_per_group) &&
+ (used_inos < EXT4_FIRST_INO(sb))) {
+ ext4_error(sb, "Something is wrong with group %u: "
+ "itable unused count: %u; "
+ "itables initialized count: %ld",
+ group, ext4_itable_unused_count(sb, gdp),
+ used_inos);
+ ret = 1;
+ goto err_out;
+ }
}
blk = ext4_inode_table(sb, gdp) + used_blks;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 08f3a0c0f468..4f610cd8041b 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -946,8 +946,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
+ if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
@@ -1109,8 +1108,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
+ if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 00f9433eea23..6f5e292e86f7 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -746,6 +746,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));
+ /*
+ * ei->i_inline_off may have changed since ext4_write_begin()
+ * called ext4_try_to_write_inline_data()
+ */
+ (void) ext4_find_inline_data_nolock(inode);
+
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
kunmap_atomic(kaddr);
@@ -1892,6 +1898,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a2bb9c60445c..6551f08e89a7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -377,8 +377,11 @@ static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map)
{
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
- map->m_len)) {
+ if (ext4_has_feature_journal(inode->i_sb) &&
+ (inode->i_ino ==
+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ return 0;
+ if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
"(length %d)", (unsigned long) map->m_lblk,
@@ -1821,13 +1824,13 @@ static int __ext4_journalled_writepage(struct page *page,
if (!ret)
ret = err;
- if (!ext4_has_inline_data(inode))
- ext4_walk_page_buffers(NULL, page_bufs, 0, len,
- NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
unlock_page(page);
out_no_pagelock:
+ if (!inline_data && page_bufs)
+ ext4_walk_page_buffers(NULL, page_bufs, 0, len,
+ NULL, bput_one);
brelse(inode_bh);
return ret;
}
@@ -3349,6 +3352,13 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t ret;
+ if (iov_iter_rw(iter) == READ) {
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
+ }
+
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
@@ -4214,7 +4224,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
EXT4_I(inode)->i_inline_off = 0;
}
-struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
+ ext4_iget_flags flags, const char *function,
+ unsigned int line)
{
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
@@ -4227,6 +4239,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
uid_t i_uid;
gid_t i_gid;
+ if ((!(flags & EXT4_IGET_SPECIAL) &&
+ (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
+ (ino < EXT4_ROOT_INO) ||
+ (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) {
+ if (flags & EXT4_IGET_HANDLE)
+ return ERR_PTR(-ESTALE);
+ __ext4_error(sb, function, line,
+ "inode #%lu: comm %s: iget: illegal inode #",
+ ino, current->comm);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -4242,11 +4266,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
raw_inode = ext4_raw_inode(&iloc);
if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- EXT4_ERROR_INODE(inode, "root inode unallocated");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}
+ if ((flags & EXT4_IGET_HANDLE) &&
+ (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
+ ret = -ESTALE;
+ goto bad_inode;
+ }
+
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -4273,7 +4304,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
if (!ext4_inode_csum_verify(inode, raw_inode, ei)) {
- EXT4_ERROR_INODE(inode, "checksum invalid");
+ ext4_error_inode(inode, function, line, 0,
+ "iget: checksum invalid");
ret = -EFSBADCRC;
goto bad_inode;
}
@@ -4321,7 +4353,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(raw_inode);
if ((size = i_size_read(inode)) < 0) {
- EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad i_size value: %lld", size);
ret = -EFSCORRUPTED;
goto bad_inode;
}
@@ -4403,8 +4436,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
- !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
- EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+ !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bad extended attribute block %llu",
ei->i_file_acl);
ret = -EFSCORRUPTED;
goto bad_inode;
@@ -4459,7 +4493,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
make_bad_inode(inode);
} else {
ret = -EFSCORRUPTED;
- EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
+ ext4_error_inode(inode, function, line, 0,
+ "iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
brelse(iloc.bh);
@@ -4473,13 +4508,6 @@ bad_inode:
return ERR_PTR(ret);
}
-struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
-{
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-EFSCORRUPTED);
- return ext4_iget(sb, ino);
-}
-
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -4598,7 +4626,7 @@ static int ext4_do_update_inode(handle_t *handle,
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
struct super_block *sb = inode->i_sb;
- int err = 0, rc, block;
+ int err = 0, block;
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
@@ -4698,9 +4726,9 @@ static int ext4_do_update_inode(handle_t *handle,
bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- rc = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (!err)
- err = rc;
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_brelse;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
if (set_large_file) {
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b02f210ea617..84f8d07302ef 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -105,7 +105,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
return -EPERM;
- inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
+ inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
@@ -675,7 +675,10 @@ encryption_policy_out:
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto pwsalt_err_journal;
+ lock_buffer(sbi->s_sbh);
generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
err = ext4_handle_dirty_metadata(handle, NULL,
sbi->s_sbh);
pwsalt_err_journal:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index fda49f4c5a8e..ac87f7e5d6a4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1944,7 +1944,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
int free;
free = e4b->bd_info->bb_free;
- BUG_ON(free <= 0);
+ if (WARN_ON(free <= 0))
+ return;
i = e4b->bd_info->bb_first_free;
@@ -1965,7 +1966,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
}
mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
- BUG_ON(ex.fe_len <= 0);
+ if (WARN_ON(ex.fe_len <= 0))
+ break;
if (free < ex.fe_len) {
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
@@ -2958,7 +2960,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (!ext4_data_block_valid(sbi, block, len)) {
+ if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata", block, block+len);
/* File system mounted not to panic on error
@@ -4644,6 +4646,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
+ kmem_cache_free(ext4_free_data_cachep, new_entry);
return 0;
}
}
@@ -4715,7 +4718,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
- !ext4_data_block_valid(sbi, block, count)) {
+ !ext4_inode_block_valid(inode, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count);
goto error_return;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 49e0d97b2ee7..8cd2a7e1eef1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1226,19 +1226,18 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
}
/*
- * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
+ * Test whether a directory entry matches the filename being searched for.
*
- * `len <= EXT4_NAME_LEN' is guaranteed by caller.
- * `de != NULL' is guaranteed by caller.
+ * Return: %true if the directory entry matches, otherwise %false.
*/
-static inline int ext4_match(struct ext4_filename *fname,
- struct ext4_dir_entry_2 *de)
+static inline bool ext4_match(const struct ext4_filename *fname,
+ const struct ext4_dir_entry_2 *de)
{
const void *name = fname_name(fname);
u32 len = fname_len(fname);
if (!de->inode)
- return 0;
+ return false;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (unlikely(!name)) {
@@ -1270,48 +1269,31 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct ext4_dir_entry_2 * de;
char * dlimit;
int de_len;
- int res;
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
while ((char *) de < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
- if ((char *) de + de->name_len <= dlimit) {
- res = ext4_match(fname, de);
- if (res < 0) {
- res = -1;
- goto return_result;
- }
- if (res > 0) {
- /* found a match - just to be sure, do
- * a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data,
- bh->b_size, offset)) {
- res = -1;
- goto return_result;
- }
- *res_dir = de;
- res = 1;
- goto return_result;
- }
-
+ if ((char *) de + de->name_len <= dlimit &&
+ ext4_match(fname, de)) {
+ /* found a match - just to be sure, do
+ * a full check */
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
+ return -1;
+ *res_dir = de;
+ return 1;
}
/* prevent looping on a bad block */
de_len = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
- if (de_len <= 0) {
- res = -1;
- goto return_result;
- }
+ if (de_len <= 0)
+ return -1;
offset += de_len;
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
}
-
- res = 0;
-return_result:
- return res;
+ return 0;
}
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
@@ -1601,7 +1583,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry);
return ERR_PTR(-EFSCORRUPTED);
}
- inode = ext4_iget_normal(dir->i_sb, ino);
+ inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1646,7 +1628,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EFSCORRUPTED);
}
- return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino));
+ return d_obtain_alias(ext4_iget(d_inode(child)->i_sb, ino, EXT4_IGET_NORMAL));
}
/*
@@ -1748,7 +1730,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
blocksize, hinfo, map);
map -= count;
dx_sort_map(map, count);
- /* Split the existing block in the middle, size-wise */
+ /* Ensure that neither split block is over half full */
size = 0;
move = 0;
for (i = count-1; i >= 0; i--) {
@@ -1758,8 +1740,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
size += map[i].size;
move++;
}
- /* map index at which we will split */
- split = count - move;
+ /*
+ * map index at which we will split
+ *
+ * If the sum of active entries didn't exceed half the block size, just
+ * split it in half by count; each resulting block will have at least
+ * half the space free.
+ */
+ if (i > 0)
+ split = count - move;
+ else
+ split = count/2;
+
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
@@ -1824,24 +1816,15 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
int nlen, rlen;
unsigned int offset = 0;
char *top;
- int res;
de = (struct ext4_dir_entry_2 *)buf;
top = buf + buf_size - reclen;
while ((char *) de <= top) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- buf, buf_size, offset)) {
- res = -EFSCORRUPTED;
- goto return_result;
- }
- /* Provide crypto context and crypto buffer to ext4 match */
- res = ext4_match(fname, de);
- if (res < 0)
- goto return_result;
- if (res > 0) {
- res = -EEXIST;
- goto return_result;
- }
+ buf, buf_size, offset))
+ return -EFSCORRUPTED;
+ if (ext4_match(fname, de))
+ return -EEXIST;
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen)
@@ -1849,15 +1832,11 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
}
-
if ((char *) de > top)
- res = -ENOSPC;
- else {
- *dest_de = de;
- res = 0;
- }
-return_result:
- return res;
+ return -ENOSPC;
+
+ *dest_de = de;
+ return 0;
}
int ext4_insert_dentry(struct inode *dir,
@@ -2343,7 +2322,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
@@ -3239,7 +3218,7 @@ static int ext4_link(struct dentry *old_dentry,
return -EMLINK;
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
- return -EPERM;
+ return -EXDEV;
err = dquot_initialize(dir);
if (err)
return err;
@@ -3392,12 +3371,35 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
return retval;
}
}
- brelse(ent->bh);
- ent->bh = NULL;
return 0;
}
+static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+ unsigned ino, unsigned file_type)
+{
+ struct ext4_renament old = *ent;
+ int retval = 0;
+
+ /*
+ * old->de could have moved from under us during make indexed dir,
+ * so the old->de may no longer valid and need to find it again
+ * before reset old inode info.
+ */
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ retval = PTR_ERR(old.bh);
+ if (!old.bh)
+ retval = -ENOENT;
+ if (retval) {
+ ext4_std_error(old.dir->i_sb, retval);
+ return;
+ }
+
+ ext4_setent(handle, &old, ino, file_type);
+ brelse(old.bh);
+}
+
static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
const struct qstr *d_name)
{
@@ -3552,14 +3554,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
retval = -ENOENT;
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
- goto end_rename;
+ goto release_bh;
if ((old.dir != new.dir) &&
ext4_encrypted_inode(new.dir) &&
!ext4_is_child_context_consistent_with_parent(new.dir,
old.inode)) {
- retval = -EPERM;
- goto end_rename;
+ retval = -EXDEV;
+ goto release_bh;
}
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
@@ -3567,7 +3569,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
new.bh = NULL;
- goto end_rename;
+ goto release_bh;
}
if (new.bh) {
if (!new.inode) {
@@ -3584,18 +3586,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- handle = NULL;
- goto end_rename;
+ goto release_bh;
}
} else {
whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
- whiteout = NULL;
- goto end_rename;
+ goto release_bh;
}
}
+ old_file_type = old.de->file_type;
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
ext4_handle_sync(handle);
@@ -3623,7 +3624,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
force_reread = (new.dir->i_ino == old.dir->i_ino &&
ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
- old_file_type = old.de->file_type;
if (whiteout) {
/*
* Do this before adding a new entry, so the old entry is sure
@@ -3695,17 +3695,23 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = 0;
end_rename:
- brelse(old.dir_bh);
- brelse(old.bh);
- brelse(new.bh);
if (whiteout) {
- if (retval)
+ if (retval) {
+ ext4_resetent(handle, &old,
+ old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
+ ext4_orphan_add(handle, whiteout);
+ }
unlock_new_inode(whiteout);
+ ext4_journal_stop(handle);
iput(whiteout);
- }
- if (handle)
+ } else {
ext4_journal_stop(handle);
+ }
+release_bh:
+ brelse(old.dir_bh);
+ brelse(old.bh);
+ brelse(new.bh);
return retval;
}
@@ -3739,7 +3745,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old.inode) ||
!ext4_is_child_context_consistent_with_parent(old_dir,
new.inode)))
- return -EPERM;
+ return -EXDEV;
retval = dquot_initialize(old.dir);
if (retval)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f5b6667b0ab0..7ed01bcc2419 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1649,7 +1649,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
"No reserved GDT blocks, can't resize");
return -EPERM;
}
- inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(inode);
@@ -1977,7 +1977,8 @@ retry:
}
if (!resize_inode)
- resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ resize_inode = ext4_iget(sb, EXT4_RESIZE_INO,
+ EXT4_IGET_SPECIAL);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 67faf147bade..1de02b90a1ef 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1022,20 +1022,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
{
struct inode *inode;
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-ESTALE);
- if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
- return ERR_PTR(-ESTALE);
-
- /* iget isn't really right if the inode is currently unallocated!!
- *
- * ext4_read_inode will return a bad_inode if the inode had been
- * deleted, so we should be safe.
- *
+ /*
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget_normal(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1461,8 +1452,8 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -4036,7 +4027,7 @@ no_journal:
* so we can safely mount the rest of the filesystem now.
*/
- root = ext4_iget(sb, EXT4_ROOT_INO);
+ root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
ret = PTR_ERR(root);
@@ -4177,6 +4168,7 @@ cantfind_ext4:
#ifdef CONFIG_QUOTA
failed_mount8:
ext4_unregister_sysfs(sb);
+ kobject_put(&sbi->s_kobj);
#endif
failed_mount7:
ext4_unregister_li_request(sb);
@@ -4273,11 +4265,12 @@ static journal_t *ext4_get_journal(struct super_block *sb,
BUG_ON(!ext4_has_feature_journal(sb));
- /* First, test for the existence of a valid inode on disk. Bad
- * things happen if we iget() an unused inode, as the subsequent
- * iput() will try to delete it. */
-
- journal_inode = ext4_iget(sb, journal_inum);
+ /*
+ * Test for the existence of a valid inode on disk. Bad things
+ * happen if we iget() an unused inode, as the subsequent iput()
+ * will try to delete it.
+ */
+ journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
return NULL;
@@ -4501,8 +4494,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
- if (!sbh || block_device_ejected(sb))
- return error;
+ if (!sbh)
+ return -EINVAL;
+ if (block_device_ejected(sb))
+ return -ENODEV;
/*
* The superblock bh should be mapped, but it might not be if the
@@ -4975,7 +4970,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_register_li_request(sb, first_not_zeroed);
}
- ext4_setup_system_zone(sb);
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
+
if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
@@ -5193,6 +5191,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb)
return -EXDEV;
+
+ /* Quota already enabled for this file? */
+ if (IS_NOQUOTA(d_inode(path->dentry)))
+ return -EBUSY;
+
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
@@ -5241,7 +5244,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
- qf_inode = ext4_iget(sb, qf_inums[type]);
+ qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
return PTR_ERR(qf_inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 53679716baca..18b9213ce0bd 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -139,31 +139,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
}
static int ext4_xattr_block_csum_verify(struct inode *inode,
- sector_t block_nr,
- struct ext4_xattr_header *hdr)
+ struct buffer_head *bh)
{
- if (ext4_has_metadata_csum(inode->i_sb) &&
- (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
- return 0;
- return 1;
-}
-
-static void ext4_xattr_block_csum_set(struct inode *inode,
- sector_t block_nr,
- struct ext4_xattr_header *hdr)
-{
- if (!ext4_has_metadata_csum(inode->i_sb))
- return;
+ struct ext4_xattr_header *hdr = BHDR(bh);
+ int ret = 1;
- hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
+ if (ext4_has_metadata_csum(inode->i_sb)) {
+ lock_buffer(bh);
+ ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
+ bh->b_blocknr, hdr));
+ unlock_buffer(bh);
+ }
+ return ret;
}
-static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
- struct inode *inode,
- struct buffer_head *bh)
+static void ext4_xattr_block_csum_set(struct inode *inode,
+ struct buffer_head *bh)
{
- ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
- return ext4_handle_dirty_metadata(handle, inode, bh);
+ if (ext4_has_metadata_csum(inode->i_sb))
+ BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
+ bh->b_blocknr, BHDR(bh));
}
static inline const struct xattr_handler *
@@ -226,7 +221,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
if (buffer_verified(bh))
return 0;
- if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
+ if (!ext4_xattr_block_csum_verify(inode, bh))
return -EFSBADCRC;
error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
bh->b_data);
@@ -590,23 +585,23 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
if (ce)
mb_cache_entry_release(ce);
+
+ ext4_xattr_block_csum_set(inode, bh);
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
* from a race where someone else frees the block (and releases
* its journal_head) before we are done dirtying the buffer. In
* nojournal mode this race is harmless and we actually cannot
- * call ext4_handle_dirty_xattr_block() with locked buffer as
+ * call ext4_handle_dirty_metadata() with locked buffer as
* that function can call sync_dirty_buffer() so for that case
* we handle the dirtying after unlocking the buffer.
*/
if (ext4_handle_valid(handle))
- error = ext4_handle_dirty_xattr_block(handle, inode,
- bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
unlock_buffer(bh);
if (!ext4_handle_valid(handle))
- error = ext4_handle_dirty_xattr_block(handle, inode,
- bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -837,13 +832,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ext4_xattr_rehash(header(s->base),
s->here);
}
+ ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
goto bad_block;
if (!error)
- error = ext4_handle_dirty_xattr_block(handle,
- inode,
- bs->bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
@@ -912,10 +908,11 @@ inserted:
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
+ ext4_xattr_block_csum_set(inode, new_bh);
unlock_buffer(new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode,
- new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ new_bh);
if (error)
goto cleanup_dquot;
}
@@ -965,11 +962,12 @@ getblk_failed:
goto getblk_failed;
}
memcpy(new_bh->b_data, s->base, new_bh->b_size);
+ ext4_xattr_block_csum_set(inode, new_bh);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode, new_bh);
+ error = ext4_handle_dirty_metadata(handle, inode,
+ new_bh);
if (error)
goto cleanup;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4b2f609f376d..047da0f10527 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -188,6 +188,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
+ if (unlikely(blkno >= TOTAL_SEGS(sbi)))
+ goto out;
/* get sit block addr */
fio.blk_addr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 92a240616f52..c1130914d6ed 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -805,19 +805,23 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
+ /* check memory boundary before moving forward */
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ if (unlikely(bit_pos > d->max ||
+ le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
+ f2fs_msg(F2FS_I_SB(d->inode)->sb, KERN_WARNING,
+ "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
+ set_sbi_flag(F2FS_I_SB(d->inode)->sb->s_fs_info, SBI_NEED_FSCK);
+ return -EINVAL;
+ }
+
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
int ret;
- de_name.name = kmalloc(de_name.len, GFP_NOFS);
- if (!de_name.name)
- return false;
-
- memcpy(de_name.name, d->filename[bit_pos], de_name.len);
-
ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
&de_name, fstr);
- kfree(de_name.name);
if (ret < 0)
return true;
@@ -829,7 +833,6 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
le32_to_cpu(de->ino), d_type))
return true;
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
return false;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bee3bc7a16ac..09185ce2493c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -666,7 +666,8 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ if (!in_group_p(inode->i_gid) &&
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
set_acl_inode(fi, mode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e5553cd8fe4e..1475a00ae7c8 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -169,7 +169,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (f2fs_encrypted_inode(dir) &&
!f2fs_is_child_context_consistent_with_parent(dir, inode))
- return -EPERM;
+ return -EXDEV;
f2fs_balance_fs(sbi);
@@ -597,7 +597,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!f2fs_is_child_context_consistent_with_parent(new_dir,
old_inode)) {
- err = -EPERM;
+ err = -EXDEV;
goto out;
}
@@ -758,7 +758,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode) ||
!f2fs_is_child_context_consistent_with_parent(old_dir,
new_inode)))
- return -EPERM;
+ return -EXDEV;
f2fs_balance_fs(sbi);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5e87b9aa7ba6..944fff1ef536 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1422,6 +1422,12 @@ static int fat_read_bpb(struct super_block *sb, struct fat_boot_sector *b,
goto out;
}
+ if (bpb->fat_fat_length == 0 && bpb->fat32_length == 0) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "bogus number of FAT sectors");
+ goto out;
+ }
+
error = 0;
out:
diff --git a/fs/file.c b/fs/file.c
index 7e9eb65a2912..090015401c55 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -88,7 +88,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
*/
static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
{
- unsigned int cpy, set;
+ size_t cpy, set;
BUG_ON(nfdt->max_fds < ofdt->max_fds);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 80ea03034017..0ce7ff7a2ce8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_completion {
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
- unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
@@ -160,7 +159,9 @@ static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+ inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
@@ -269,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
wb_put(wb);
}
+EXPORT_SYMBOL_GPL(__inode_attach_wb);
/**
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
@@ -510,9 +512,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* find and pin the new wb */
rcu_read_lock();
memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
- if (memcg_css)
- isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ if (memcg_css && !css_tryget(memcg_css))
+ memcg_css = NULL;
rcu_read_unlock();
+ if (!memcg_css)
+ goto out_free;
+
+ isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ css_put(memcg_css);
if (!isw->new_wb)
goto out_free;
@@ -1033,7 +1040,9 @@ void inode_io_list_del(struct inode *inode)
struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
inode_io_list_del_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
@@ -1046,8 +1055,10 @@ void inode_io_list_del(struct inode *inode)
* the case then the inode must have been redirtied while it was being written
* out and we don't reset its dirtied_when.
*/
-static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
{
+ assert_spin_locked(&inode->i_lock);
+
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -1056,6 +1067,14 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
inode->dirtied_when = jiffies;
}
inode_io_list_move_locked(inode, wb, &wb->b_dirty);
+ inode->i_state &= ~I_SYNC_QUEUED;
+}
+
+static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+{
+ spin_lock(&inode->i_lock);
+ redirty_tail_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
}
/*
@@ -1094,16 +1113,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
#define EXPIRE_DIRTY_ATIME 0x0001
/*
- * Move expired (dirtied before work->older_than_this) dirty inodes from
+ * Move expired (dirtied before dirtied_before) dirty inodes from
* @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
- int flags,
- struct wb_writeback_work *work)
+ int flags, unsigned long dirtied_before)
{
- unsigned long *older_than_this = NULL;
- unsigned long expire_time;
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
@@ -1111,21 +1127,17 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
- if ((flags & EXPIRE_DIRTY_ATIME) == 0)
- older_than_this = work->older_than_this;
- else if (!work->for_sync) {
- expire_time = jiffies - (dirtytime_expire_interval * HZ);
- older_than_this = &expire_time;
- }
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (older_than_this &&
- inode_dirtied_after(inode, *older_than_this))
+ if (inode_dirtied_after(inode, dirtied_before))
break;
list_move(&inode->i_io_list, &tmp);
moved++;
+ spin_lock(&inode->i_lock);
if (flags & EXPIRE_DIRTY_ATIME)
- set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
+ inode->i_state |= I_DIRTY_TIME_EXPIRED;
+ inode->i_state |= I_SYNC_QUEUED;
+ spin_unlock(&inode->i_lock);
if (sb_is_blkdev_sb(inode->i_sb))
continue;
if (sb && sb != inode->i_sb)
@@ -1163,18 +1175,22 @@ out:
* |
* +--> dequeue for IO
*/
-static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
+ unsigned long dirtied_before)
{
int moved;
+ unsigned long time_expire_jif = dirtied_before;
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before);
+ if (!work->for_sync)
+ time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
- EXPIRE_DIRTY_ATIME, work);
+ EXPIRE_DIRTY_ATIME, time_expire_jif);
if (moved)
wb_io_lists_populated(wb);
- trace_writeback_queue_io(wb, work, moved);
+ trace_writeback_queue_io(wb, work, dirtied_before, moved);
}
static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1268,7 +1284,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
return;
}
@@ -1288,7 +1304,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* retrying writeback of the dirty page/inode
* that cannot be performed immediately.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
}
} else if (inode->i_state & I_DIRTY) {
/*
@@ -1296,10 +1312,11 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* such as delayed allocation during submission or metadata
* updates after data IO completion.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ inode->i_state &= ~I_SYNC_QUEUED;
} else {
/* The inode is clean. Remove from writeback lists. */
inode_io_list_del_locked(inode, wb);
@@ -1542,8 +1559,8 @@ static long writeback_sb_inodes(struct super_block *sb,
*/
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+ redirty_tail_locked(inode, wb);
spin_unlock(&inode->i_lock);
- redirty_tail(inode, wb);
continue;
}
if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
@@ -1684,7 +1701,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
if (list_empty(&wb->b_io))
- queue_io(wb, &work);
+ queue_io(wb, &work, jiffies);
__writeback_inodes_wb(wb, &work);
spin_unlock(&wb->list_lock);
blk_finish_plug(&plug);
@@ -1704,7 +1721,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
* takes longer than a dirty_writeback_interval interval, then leave a
* one-second gap.
*
- * older_than_this takes precedence over nr_to_write. So we'll only write back
+ * dirtied_before takes precedence over nr_to_write. So we'll only write back
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
@@ -1712,14 +1729,11 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
- unsigned long oldest_jif;
+ unsigned long dirtied_before = jiffies;
struct inode *inode;
long progress;
struct blk_plug plug;
- oldest_jif = jiffies;
- work->older_than_this = &oldest_jif;
-
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
for (;;) {
@@ -1753,14 +1767,14 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
- oldest_jif = jiffies -
+ dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
- oldest_jif = jiffies;
+ dirtied_before = jiffies;
trace_writeback_start(wb, work);
if (list_empty(&wb->b_io))
- queue_io(wb, work);
+ queue_io(wb, work, dirtied_before);
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
@@ -1920,7 +1934,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
@@ -2031,28 +2045,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
return ret;
}
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
-{
- if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
- struct dentry *dentry;
- const char *name = "?";
-
- dentry = d_find_alias(inode);
- if (dentry) {
- spin_lock(&dentry->d_lock);
- name = (const char *) dentry->d_name.name;
- }
- printk(KERN_DEBUG
- "%s(%d): dirtied inode %lu (%s) on %s\n",
- current->comm, task_pid_nr(current), inode->i_ino,
- name, inode->i_sb->s_id);
- if (dentry) {
- spin_unlock(&dentry->d_lock);
- dput(dentry);
- }
- }
-}
-
/**
* __mark_inode_dirty - internal function
* @inode: inode to mark
@@ -2111,9 +2103,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
- if (unlikely(block_dump))
- block_dump___mark_inode_dirty(inode);
-
spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
@@ -2127,11 +2116,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode->i_state |= flags;
/*
- * If the inode is being synced, just update its dirty state.
- * The unlocker will place the inode on the appropriate
- * superblock list, based upon its state.
+ * If the inode is queued for writeback by flush worker, just
+ * update its dirty state. Once the flush worker is done with
+ * the inode it will place it on the appropriate superblock
+ * list, based upon its state.
*/
- if (inode->i_state & I_SYNC)
+ if (inode->i_state & I_SYNC_QUEUED)
goto out_unlock_inode;
/*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index d9aba9700726..b83367300f48 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -616,6 +616,8 @@ static int __init cuse_init(void)
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
+ /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */
+ cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 16891f5364af..38a12b0e395f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -145,9 +145,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
static void fuse_drop_waiting(struct fuse_conn *fc)
{
- if (fc->connected) {
- atomic_dec(&fc->num_waiting);
- } else if (atomic_dec_and_test(&fc->num_waiting)) {
+ /*
+ * lockess check of fc->connected is okay, because atomic_dec_and_test()
+ * provides a memory barrier mached with the one in fuse_wait_aborted()
+ * to ensure no wake-up is missed.
+ */
+ if (atomic_dec_and_test(&fc->num_waiting) &&
+ !READ_ONCE(fc->connected)) {
/* wake up aborters */
wake_up_all(&fc->blocked_waitq);
}
@@ -846,7 +850,6 @@ static int fuse_check_page(struct page *page)
{
if (page_mapcount(page) ||
page->mapping != NULL ||
- page_count(page) != 1 ||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
@@ -1312,6 +1315,15 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
goto restart;
}
spin_lock(&fpq->lock);
+ /*
+ * Must not put request on fpq->io queue after having been shut down by
+ * fuse_abort_conn()
+ */
+ if (!fpq->connected) {
+ req->out.h.error = err = -ECONNABORTED;
+ goto out_end;
+
+ }
list_add(&req->list, &fpq->io);
spin_unlock(&fpq->lock);
cs->req = req;
@@ -1919,7 +1931,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
}
err = -EINVAL;
- if (oh.error <= -1000 || oh.error > 0)
+ if (oh.error <= -512 || oh.error > 0)
goto err_finish;
spin_lock(&fpq->lock);
@@ -2222,6 +2234,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
void fuse_wait_aborted(struct fuse_conn *fc)
{
+ /* matches implicit memory barrier in fuse_drop_waiting() */
+ smp_mb();
wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6ce6754168e0..f7d025d1684c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -17,6 +17,7 @@
#include <linux/swap.h>
#include <linux/falloc.h>
#include <linux/uio.h>
+#include <linux/fs.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -2517,7 +2518,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
- iov->iov_len = _IOC_SIZE(cmd);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ iov->iov_len = sizeof(int);
+ break;
+ default:
+ iov->iov_len = _IOC_SIZE(cmd);
+ break;
+ }
if (_IOC_DIR(cmd) & _IOC_WRITE) {
in_iov = iov;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8744bd773823..dec23fb358ec 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1035,7 +1035,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
if (fl_gh->gh_state == state)
goto out;
locks_lock_file_wait(file,
- &(struct file_lock){.fl_type = F_UNLCK});
+ &(struct file_lock) {
+ .fl_type = F_UNLCK,
+ .fl_flags = FL_FLOCK
+ });
gfs2_glock_dq(fl_gh);
gfs2_holder_reinit(state, flags, fl_gh);
} else {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f80ffccb0316..f115ce93dfb4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -541,9 +541,6 @@ __acquires(&gl->gl_lockref.lock)
goto out_unlock;
if (nonblock)
goto out_sched;
- smp_mb();
- if (atomic_read(&gl->gl_revokes) != 0)
- goto out_sched;
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
gl->gl_target = gl->gl_demote_state;
@@ -754,7 +751,8 @@ again:
}
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
- atomic_dec(&sdp->sd_glock_disposal);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
*glp = tmp;
return ret;
@@ -1344,6 +1342,7 @@ __acquires(&lru_lock)
while(!list_empty(list)) {
gl = list_entry(list->next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
+ clear_bit(GLF_LRU, &gl->gl_flags);
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
@@ -1390,7 +1389,6 @@ static long gfs2_scan_glock_lru(int nr)
if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
- clear_bit(GLF_LRU, &gl->gl_flags);
freed++;
continue;
}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 3c3d037df824..da9f97911852 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- int lvb_needs_unlock = 0;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
@@ -297,13 +296,15 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
- /* don't want to skip dlm_unlock writing the lvb when lock is ex */
-
- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
- lvb_needs_unlock = 1;
+ /* don't want to call dlm if we've unmounted the lock protocol */
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ gfs2_glock_free(gl);
+ return;
+ }
+ /* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !lvb_needs_unlock) {
+ !gl->gl_lksb.sb_lvbptr) {
gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index de7143e2b361..8ed2b1a71637 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -160,15 +160,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
+ if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ return -EINVAL;
+ }
- fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ (sb->sb_bsize & (sb->sb_bsize - 1))) {
+ pr_warn("Invalid superblock size\n");
+ return -EINVAL;
+ }
- return -EINVAL;
+ return 0;
}
static void end_bio_io_page(struct bio *bio)
@@ -916,7 +920,7 @@ fail:
}
static const match_table_t nolock_tokens = {
- { Opt_jid, "jid=%d\n", },
+ { Opt_jid, "jid=%d", },
{ Opt_err, NULL },
};
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2736e9cfc2ee..dd0d8c1bf5c5 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -747,9 +747,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
gfs2_free_clones(rgd);
+ return_all_reservations(rgd);
kfree(rgd->rd_bits);
rgd->rd_bits = NULL;
- return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
@@ -1017,6 +1017,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
if (error < 0)
return error;
+ if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
+ fs_err(sdp, "no resource groups found in the file system.\n");
+ return -ENOENT;
+ }
set_rgrp_preferences(sdp);
sdp->sd_rindex_uptodate = 1;
@@ -1388,6 +1392,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index de69d8a24f6d..7f2ef95dcd05 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- mutex_lock(&tree->tree_lock);
+ switch (tree->cnid) {
+ case HFS_CAT_CNID:
+ mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
+ break;
+ case HFS_EXT_CNID:
+ mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
+ break;
+ case HFS_ATTR_CNID:
+ mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 221719eac5de..2cda99e61cae 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -14,16 +14,31 @@
#include "btree.h"
-void hfs_bnode_read(struct hfs_bnode *node, void *buf,
- int off, int len)
+void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
+ int pagenum;
+ int bytes_read;
+ int bytes_to_read;
+ void *vaddr;
off += node->page_offset;
- page = node->page[0];
+ pagenum = off >> PAGE_SHIFT;
+ off &= ~PAGE_MASK; /* compute page offset for the first page */
- memcpy(buf, kmap(page) + off, len);
- kunmap(page);
+ for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) {
+ if (pagenum >= node->tree->pages_per_bnode)
+ break;
+ page = node->page[pagenum];
+ bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off);
+
+ vaddr = kmap_atomic(page);
+ memcpy(buf + bytes_read, vaddr + off, bytes_to_read);
+ kunmap_atomic(vaddr);
+
+ pagenum++;
+ off = 0; /* page offset only applies to the first page */
+ }
}
u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index 2715f416b5a8..308b5f1af65b 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -12,6 +12,13 @@ typedef int (*btree_keycmp)(const btree_key *, const btree_key *);
#define NODE_HASH_SIZE 256
+/* B-tree mutex nested subclasses */
+enum hfs_btree_mutex_classes {
+ CATALOG_BTREE_MUTEX,
+ EXTENTS_BTREE_MUTEX,
+ ATTR_BTREE_MUTEX,
+};
+
/* A HFS BTree held in memory */
struct hfs_btree {
struct super_block *sb;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4574fdd3d421..3eb815bb2c78 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -426,14 +426,12 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
if (!res) {
if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
res = -EIO;
- goto bail;
+ goto bail_hfs_find;
}
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
}
- if (res) {
- hfs_find_exit(&fd);
- goto bail_no_root;
- }
+ if (res)
+ goto bail_hfs_find;
res = -EINVAL;
root_inode = hfs_iget(sb, &fd.search_key->cat, &rec);
hfs_find_exit(&fd);
@@ -449,6 +447,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
/* everything's okay */
return 0;
+bail_hfs_find:
+ hfs_find_exit(&fd);
bail_no_root:
pr_err("get root inode failed\n");
bail:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 937c6ee1786f..1d5e3b0a3b1a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -414,7 +414,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
if (next >= end)
break;
- hash = hugetlb_fault_mutex_hash(h, mapping, next, 0);
+ hash = hugetlb_fault_mutex_hash(h, mapping, next);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
lock_page(page);
@@ -630,7 +630,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
@@ -661,8 +661,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ set_page_huge_active(page);
/*
- * page_put due to reference from alloc_huge_page()
+ * put_page() due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache()
*/
put_page(page);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index b943cbd963bb..2e7d74c7beed 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -151,6 +151,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
inode->i_ino);
+ brelse(bh);
return -EIO;
}
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 7b543e6b6526..696f255d1532 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -101,6 +101,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
dir->i_ino);
+ brelse(bh);
return 0;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3233e5ac9774..ce2bf9d74224 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1906,6 +1906,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
jbd2_journal_put_journal_head(jh);
@@ -1997,6 +2000,7 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
{
struct buffer_head *head;
struct buffer_head *bh;
+ bool has_write_io_error = false;
int ret = 0;
J_ASSERT(PageLocked(page));
@@ -2021,11 +2025,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
jbd_unlock_bh_state(bh);
if (buffer_jbd(bh))
goto busy;
+
+ /*
+ * If we free a metadata buffer which has been failed to
+ * write out, the jbd2 checkpoint procedure will not detect
+ * this failure and may lead to filesystem inconsistency
+ * after cleanup journal tail.
+ */
+ if (buffer_write_io_error(bh)) {
+ pr_err("JBD2: Error while async write back metadata bh %llu.",
+ (unsigned long long)bh->b_blocknr);
+ has_write_io_error = true;
+ }
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy:
+ if (has_write_io_error)
+ jbd2_journal_abort(journal, -EIO);
+
return ret;
}
@@ -2453,6 +2472,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
+
+ /*
+ * b_transaction must be set, otherwise the new b_transaction won't
+ * be holding jh reference
+ */
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __jbd2_journal_file_buffer() must not
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 406d9cc84ba8..79e771ab624f 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -37,6 +37,9 @@ static int jffs2_rtime_compress(unsigned char *data_in,
int outpos = 0;
int pos=0;
+ if (*dstlen <= 3)
+ return -1;
+
memset(positions,0,sizeof(positions));
while (pos < (*sourcelen) && outpos <= (*dstlen)-2) {
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index e27317169697..7a3368929245 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -588,10 +588,14 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
int ret;
uint32_t now = get_seconds();
+ mutex_lock(&f->sem);
for (fd = f->dents ; fd; fd = fd->next) {
- if (fd->ino)
+ if (fd->ino) {
+ mutex_unlock(&f->sem);
return -ENOTEMPTY;
+ }
}
+ mutex_unlock(&f->sem);
ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
dentry->d_name.len, f, now);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 5b52ea41b84f..bee8964682f8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
jffs2_free_full_dirent(fd);
return -EIO;
}
+
+#ifdef CONFIG_JFFS2_SUMMARY
+ /*
+ * we use CONFIG_JFFS2_SUMMARY because without it, we
+ * have checked it while mounting
+ */
+ crc = crc32(0, fd->name, rd->nsize);
+ if (unlikely(crc != je32_to_cpu(rd->name_crc))) {
+ JFFS2_NOTICE("name CRC failed on dirent node at"
+ "%#08x: read %#08x,calculated %#08x\n",
+ ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
+ jffs2_mark_node_obsolete(c, ref);
+ jffs2_free_full_dirent(fd);
+ return 0;
+ }
+#endif
}
fd->nhash = full_name_hash(fd->name, rd->nsize);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 9ad5ba4b299b..5f90173ae38d 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -1075,7 +1075,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
memcpy(&fd->name, rd->name, checkedlen);
fd->name[checkedlen] = 0;
- crc = crc32(0, fd->name, rd->nsize);
+ crc = crc32(0, fd->name, checkedlen);
if (crc != je32_to_cpu(rd->name_crc)) {
pr_notice("%s(): Name CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
__func__, ofs, je32_to_cpu(rd->name_crc), crc);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index bc5385471a6e..c05d6f5f10ec 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -783,6 +783,8 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
dbg_summary("Writing unknown RWCOMPAT_COPY node type %x\n",
je16_to_cpu(temp->u.nodetype));
jffs2_sum_disable_collecting(c->summary);
+ /* The above call removes the list, nothing more to do */
+ goto bail_rwcompat;
} else {
BUG(); /* unknown node in summary information */
}
@@ -794,6 +796,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
c->summary->sum_num--;
}
+ bail_rwcompat:
jffs2_sum_reset_collected(c->summary);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 41aa3ca6a6a4..b318732a8562 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -160,7 +160,8 @@ void jfs_evict_inode(struct inode *inode)
if (test_cflag(COMMIT_Freewmap, inode))
jfs_free_zero_link(inode);
- diFree(inode);
+ if (JFS_SBI(inode->i_sb)->ipimap)
+ diFree(inode);
/*
* Free the inode from the quota allocation.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2d514c7affc2..9ff510a489cb 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1669,7 +1669,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
} else if (rc == -ENOSPC) {
/* search for next smaller log2 block */
l2nb = BLKSTOL2(nblocks) - 1;
- nblocks = 1 << l2nb;
+ nblocks = 1LL << l2nb;
} else {
/* Trim any already allocated blocks */
jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n");
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 562b9a7e4311..f502a15c6c98 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -196,7 +196,7 @@ typedef union dmtree {
#define dmt_leafidx t1.leafidx
#define dmt_height t1.height
#define dmt_budmin t1.budmin
-#define dmt_stree t1.stree
+#define dmt_stree t2.stree
/*
* on-disk aggregate disk allocation map descriptor.
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index b67d64671bb4..415bfa90607a 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -281,5 +281,6 @@
* fsck() must be run to repair
*/
#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */
+#define FM_STATE_MAX 0x0000000f /* max value of s_state */
#endif /* _H_JFS_FILSYS */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index a69bdf2a1085..d19542a88c2c 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1339,6 +1339,7 @@ int lmLogInit(struct jfs_log * log)
} else {
if (memcmp(logsuper->uuid, log->uuid, 16)) {
jfs_warn("wrong uuid on JFS log device");
+ rc = -EINVAL;
goto errout20;
}
log->size = le32_to_cpu(logsuper->size);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 9895595fd2f2..103788ecc28c 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -49,6 +49,7 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
+#include <linux/log2.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
@@ -378,6 +379,15 @@ static int chkSuper(struct super_block *sb)
sbi->bsize = bsize;
sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize);
+ /* check some fields for possible corruption */
+ if (sbi->l2bsize != ilog2((u32)bsize) ||
+ j_sb->pad != 0 ||
+ le32_to_cpu(j_sb->s_state) > FM_STATE_MAX) {
+ rc = -EINVAL;
+ jfs_err("jfs_mount: Mount Failure: superblock is corrupt!");
+ goto out;
+ }
+
/*
* For now, ignore s_pbsize, l2bfactor. All I/O going through buffer
* cache.
diff --git a/fs/libfs.c b/fs/libfs.c
index a33e95f8729b..01e9cae5b160 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -827,7 +827,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
struct simple_attr *attr;
- u64 val;
+ unsigned long long val;
size_t size;
ssize_t ret;
@@ -845,7 +845,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- val = simple_strtoll(attr->set_buf, NULL, 0);
+ ret = kstrtoull(attr->set_buf, 0, &val);
+ if (ret)
+ goto out;
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index c7eb47f2fb6c..603fa652b965 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -430,12 +430,7 @@ nlm_bind_host(struct nlm_host *host)
* RPC rebind is required
*/
if ((clnt = host->h_rpcclnt) != NULL) {
- if (time_after_eq(jiffies, host->h_nextrebind)) {
- rpc_force_rebind(clnt);
- host->h_nextrebind = jiffies + NLM_HOST_REBIND;
- dprintk("lockd: next rebind in %lu jiffies\n",
- host->h_nextrebind - jiffies);
- }
+ nlm_rebind_host(host);
} else {
unsigned long increment = nlmsvc_timeout;
struct rpc_timeout timeparms = {
@@ -483,13 +478,20 @@ nlm_bind_host(struct nlm_host *host)
return clnt;
}
-/*
- * Force a portmap lookup of the remote lockd port
+/**
+ * nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port
+ * @host: NLM host handle for peer
+ *
+ * This is not needed when using a connection-oriented protocol, such as TCP.
+ * The existing autobind mechanism is sufficient to force a rebind when
+ * required, e.g. on connection state transitions.
*/
void
nlm_rebind_host(struct nlm_host *host)
{
- dprintk("lockd: rebind host %s\n", host->h_name);
+ if (host->h_proto != IPPROTO_UDP)
+ return;
+
if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(host->h_rpcclnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 086cd0a61e80..8d9bc0344cf3 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -155,6 +155,23 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
return 0;
}
+static bool minix_check_superblock(struct minix_sb_info *sbi)
+{
+ if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+ return false;
+
+ /*
+ * s_max_size must not exceed the block mapping limitation. This check
+ * is only needed for V1 filesystems, since V2/V3 support an extra level
+ * of indirect blocks which places the limit well above U32_MAX.
+ */
+ if (sbi->s_version == MINIX_V1 &&
+ sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE)
+ return false;
+
+ return true;
+}
+
static int minix_fill_super(struct super_block *s, void *data, int silent)
{
struct buffer_head *bh;
@@ -233,11 +250,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
} else
goto out_no_fs;
+ if (!minix_check_superblock(sbi))
+ goto out_illegal_sb;
+
/*
* Allocate the buffer map to keep the superblock small.
*/
- if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
- goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
map = kzalloc(i, GFP_KERNEL);
if (!map)
@@ -472,6 +490,13 @@ static struct inode *V1_minix_iget(struct inode *inode)
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
@@ -505,6 +530,13 @@ static struct inode *V2_minix_iget(struct inode *inode)
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index a731cabf1540..3816427e8938 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -74,6 +74,7 @@ static int alloc_branch(struct inode *inode,
int n = 0;
int i;
int parent = minix_new_block(inode);
+ int err = -ENOSPC;
branch[0].key = cpu_to_block(parent);
if (parent) for (n = 1; n < num; n++) {
@@ -84,6 +85,11 @@ static int alloc_branch(struct inode *inode,
break;
branch[n].key = cpu_to_block(nr);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ minix_free_block(inode, nr);
+ err = -ENOMEM;
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
branch[n].bh = bh;
@@ -102,7 +108,7 @@ static int alloc_branch(struct inode *inode,
bforget(branch[i].bh);
for (i = 0; i < n; i++)
minix_free_block(inode, block_to_cpu(branch[i].key));
- return -ENOSPC;
+ return err;
}
static inline int splice_branch(struct inode *inode,
diff --git a/fs/namespace.c b/fs/namespace.c
index 88c5d5bddf74..ddd9d47059ee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1830,6 +1830,20 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
+static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ struct mount *child;
+
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, dentry))
+ continue;
+
+ if (child->mnt.mnt_flags & MNT_LOCKED)
+ return true;
+ }
+ return false;
+}
+
/**
* clone_private_mount - create a private clone of a path
*
@@ -1844,16 +1858,27 @@ struct vfsmount *clone_private_mount(struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
+ down_read(&namespace_sem);
if (IS_MNT_UNBINDABLE(old_mnt))
- return ERR_PTR(-EINVAL);
+ goto invalid;
+
+ if (!check_mnt(old_mnt))
+ goto invalid;
+
+ if (has_locked_children(old_mnt, path->dentry))
+ goto invalid;
- down_read(&namespace_sem);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
up_read(&namespace_sem);
+
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
return &new_mnt->mnt;
+
+invalid:
+ up_read(&namespace_sem);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -2169,19 +2194,6 @@ static int do_change_type(struct path *path, int flag)
return err;
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
-{
- struct mount *child;
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (!is_subdir(child->mnt_mountpoint, dentry))
- continue;
-
- if (child->mnt.mnt_flags & MNT_LOCKED)
- return true;
- }
- return false;
-}
-
/*
* do loopback mount.
*/
@@ -3161,8 +3173,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* make certain new is below the root */
if (!is_path_reachable(new_mnt, new.dentry, &root))
goto out4;
- root_mp->m_count++; /* pin it so it won't go away */
lock_mount_hash();
+ root_mp->m_count++; /* pin it so it won't go away */
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index c3428767332c..55ebf9f4a824 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -132,7 +132,7 @@ config PNFS_OBJLAYOUT
config PNFS_FLEXFILE_LAYOUT
tristate
depends on NFS_V4_1 && NFS_V3
- default m
+ default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d6d5d2a48e83..ba2cd0bd3894 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -377,7 +377,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
if (cl_init->hostname == NULL) {
WARN_ON(1);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
dprintk("--> nfs_get_client(%s,v%u)\n",
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 21e5fcbcb227..ba7e98d8ce09 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -562,6 +562,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
+ if (entry->label)
+ entry->label->len = NFS4_MAXLABELLEN;
+
status = xdr_decode(desc, entry, &stream);
if (status != 0) {
if (status == -EAGAIN)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 8e268965c96d..3f1ea498ecab 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -716,7 +716,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
if (unlikely(!p))
goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++);
- if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
+ if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
goto out_err;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6506775575aa..e7f8732895b7 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -86,7 +86,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p))
return -ENOBUFS;
fh->size = be32_to_cpup(p++);
- if (fh->size > sizeof(struct nfs_fh)) {
+ if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size);
return -EOVERFLOW;
@@ -855,9 +855,8 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
- if (WARN_ON_ONCE(pgio->pg_mirror_count !=
- FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
- goto out_mds;
+ if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
+ goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
@@ -869,11 +868,15 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
}
return;
-
+out_eagain:
+ pnfs_generic_pg_cleanup(pgio);
+ pgio->pg_error = -EAGAIN;
+ return;
out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_write_mds(pgio);
+ pgio->pg_error = -EAGAIN;
}
static unsigned int
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d25b55ceb9d5..0d7b8c6e1de8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1430,10 +1430,10 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
*/
static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
- const struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+ return (long)(fattr->gencount - attr_gencount) > 0 ||
+ (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
}
/*
@@ -1849,7 +1849,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo_timestamp = now;
}
/* Set the barrier to be more recent than this fattr */
- if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount;
}
@@ -1964,7 +1964,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 578350fd96e1..7eeab683a81f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -534,12 +534,14 @@ extern int nfs41_walk_client_list(struct nfs_client *clp,
static inline struct inode *nfs_igrab_and_active(struct inode *inode)
{
- inode = igrab(inode);
- if (inode != NULL && !nfs_sb_active(inode->i_sb)) {
- iput(inode);
- inode = NULL;
+ struct super_block *sb = inode->i_sb;
+
+ if (sb && nfs_sb_active(sb)) {
+ if (igrab(inode))
+ return inode;
+ nfs_sb_deactive(sb);
}
- return inode;
+ return NULL;
}
static inline void nfs_iput_and_deactive(struct inode *inode)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index c8162c660c44..d29ad4e02d33 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
- * @dentry - pointer to dentry
+ * @dentry_in - pointer to dentry
* @buffer - result buffer
- * @buflen - length of buffer
+ * @buflen_in - length of buffer
* @flags - options (see below)
*
* Helper function for constructing the server pathname
@@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* the original device (export) name
* (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
- unsigned flags)
+char *nfs_path(char **p, struct dentry *dentry_in, char *buffer,
+ ssize_t buflen_in, unsigned flags)
{
char *end;
int namelen;
unsigned seq;
const char *base;
+ struct dentry *dentry;
+ ssize_t buflen;
rename_retry:
+ buflen = buflen_in;
+ dentry = dentry_in;
end = buffer+buflen;
*--end = '\0';
buflen--;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 1ebe2fc7cda2..05c697d5b477 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -213,37 +213,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- struct posix_acl *alloc = NULL, *dfacl = NULL;
+ struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
int status;
if (S_ISDIR(inode->i_mode)) {
switch(type) {
case ACL_TYPE_ACCESS:
- alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT);
+ alloc = get_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(alloc))
goto fail;
+ dfacl = alloc;
break;
case ACL_TYPE_DEFAULT:
- dfacl = acl;
- alloc = acl = get_acl(inode, ACL_TYPE_ACCESS);
+ alloc = get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(alloc))
goto fail;
+ dfacl = acl;
+ acl = alloc;
break;
}
}
if (acl == NULL) {
- alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+ alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
if (IS_ERR(alloc))
goto fail;
+ acl = alloc;
}
status = __nfs3_proc_setacls(inode, acl, dfacl);
- posix_acl_release(alloc);
+out:
+ if (acl != orig)
+ posix_acl_release(acl);
+ if (dfacl != orig)
+ posix_acl_release(dfacl);
return status;
fail:
- return PTR_ERR(alloc);
+ status = PTR_ERR(alloc);
+ goto out;
}
const struct xattr_handler *nfs3_xattr_handlers[] = {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index cb28cceefebe..9f365b004453 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -363,7 +363,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
break;
case NFS3_CREATE_UNCHECKED:
- goto out;
+ goto out_release_acls;
}
nfs_fattr_init(data->res.dir_attr);
nfs_fattr_init(data->res.fattr);
@@ -708,7 +708,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
break;
default:
status = -EINVAL;
- goto out;
+ goto out_release_acls;
}
status = nfs3_do_create(dir, dentry, data);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 267126d32ec0..4a68837e92ea 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -33,6 +33,7 @@
*/
#define NFS3_fhandle_sz (1+16)
#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
+#define NFS3_post_op_fh_sz (1+NFS3_fh_sz)
#define NFS3_sattr_sz (15)
#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
@@ -70,7 +71,7 @@
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
-#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_createres_sz (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 7f1a0fb8c493..31cc6f3d992d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -168,7 +168,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
if (status)
return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+ if (whence == SEEK_DATA && res.sr_eof)
+ return -NFS4ERR_NXIO;
+ else
+ return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index c5e884585c23..9b9c8e598436 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -168,7 +168,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
case SEEK_HOLE:
case SEEK_DATA:
ret = nfs42_proc_llseek(filep, offset, whence);
- if (ret != -ENOTSUPP)
+ if (ret != -EOPNOTSUPP)
return ret;
default:
return nfs_file_llseek(filep, offset, whence);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 08207001d475..e10bada12361 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4047,12 +4047,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
{
struct inode *dir = d_inode(dentry);
+ struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_readdir_arg args = {
.fh = NFS_FH(dir),
.pages = pages,
.pgbase = 0,
.count = count,
- .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
.plus = plus,
};
struct nfs4_readdir_res res;
@@ -4067,9 +4067,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
dentry,
(unsigned long long)cookie);
+ if (!(server->caps & NFS_CAP_SECURITY_LABEL))
+ args.bitmask = server->attr_bitmask_nl;
+ else
+ args.bitmask = server->attr_bitmask;
+
nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
- status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+ &res.seq_res, 0);
if (status >= 0) {
memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
@@ -4842,6 +4848,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
int ret, i;
+ /* You can't remove system.nfs4_acl: */
+ if (buflen == 0)
+ return -EINVAL;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
@@ -4878,6 +4887,14 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
do {
err = __nfs4_proc_set_acl(inode, buf, buflen);
trace_nfs4_set_acl(inode, err);
+ if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) {
+ /*
+ * no need to retry since the kernel
+ * isn't involved in encoding the ACEs.
+ */
+ err = -EINVAL;
+ break;
+ }
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
@@ -4916,9 +4933,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
return ret;
if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
return -ENOENT;
- if (buflen < label.len)
- return -ERANGE;
- return 0;
+ return label.len;
}
static int nfs4_get_security_label(struct inode *inode, void *buf,
@@ -6054,6 +6069,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
+ struct nfs4_state_owner *sp = state->owner;
unsigned char fl_flags = request->fl_flags;
int status = -ENOLCK;
@@ -6068,6 +6084,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
status = do_vfs_lock(state->inode, request);
if (status < 0)
goto out;
+ mutex_lock(&sp->so_delegreturn_mutex);
down_read(&nfsi->rwsem);
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
/* Yes: cache locks! */
@@ -6075,9 +6092,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
request->fl_flags = fl_flags & ~FL_SLEEP;
status = do_vfs_lock(state->inode, request);
up_read(&nfsi->rwsem);
+ mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
}
up_read(&nfsi->rwsem);
+ mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
request->fl_flags = fl_flags;
@@ -6169,7 +6188,12 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
err = nfs4_set_lock_state(state, fl);
if (err != 0)
return err;
- err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ do {
+ err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ ssleep(1);
+ } while (err == -NFS4ERR_DELAY);
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
}
@@ -6292,10 +6316,6 @@ static size_t nfs4_xattr_list_nfs4_acl(const struct xattr_handler *handler,
}
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-static inline int nfs4_server_supports_labels(struct nfs_server *server)
-{
- return server->caps & NFS_CAP_SECURITY_LABEL;
-}
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
struct dentry *dentry, const char *key,
@@ -6317,29 +6337,34 @@ static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler,
return -EOPNOTSUPP;
}
-static size_t nfs4_xattr_list_nfs4_label(const struct xattr_handler *handler,
- struct dentry *dentry, char *list,
- size_t list_len, const char *name,
- size_t name_len)
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
{
- size_t len = 0;
+ int len = 0;
- if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
- len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
- if (list && len <= list_len)
- security_inode_listsecurity(d_inode(dentry), list, len);
+ if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) {
+ len = security_inode_listsecurity(inode, list, list_len);
+ if (list_len && len > list_len)
+ return -ERANGE;
}
return len;
}
static const struct xattr_handler nfs4_xattr_nfs4_label_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .list = nfs4_xattr_list_nfs4_label,
.get = nfs4_xattr_get_nfs4_label,
.set = nfs4_xattr_set_nfs4_label,
};
-#endif
+#else
+
+static ssize_t
+nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
+{
+ return 0;
+}
+
+#endif
/*
* nfs_fhget will use either the mounted_on_fileid or the fileid
@@ -8769,6 +8794,24 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#endif
};
+ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+ ssize_t error, error2;
+
+ error = generic_listxattr(dentry, list, size);
+ if (error < 0)
+ return error;
+ if (list) {
+ list += error;
+ size -= error;
+ }
+
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ if (error2 < 0)
+ return error2;
+ return error + error2;
+}
+
static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@@ -8785,7 +8828,7 @@ static const struct inode_operations nfs4_dir_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
@@ -8795,7 +8838,7 @@ static const struct inode_operations nfs4_file_inode_operations = {
.setattr = nfs_setattr,
.getxattr = generic_getxattr,
.setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
+ .listxattr = nfs4_listxattr,
.removexattr = generic_removexattr,
};
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 15cd9db6d616..28c1b765e444 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4158,7 +4158,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
goto out_overflow;
if (len < NFS4_MAXLABELLEN) {
if (label) {
- memcpy(label->label, p, len);
+ if (label->len) {
+ if (label->len < len)
+ return -ERANGE;
+ memcpy(label->label, p, len);
+ }
label->len = len;
label->pi = pi;
label->lfs = lfs;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f5de58c5773f..18868e318b03 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -993,17 +993,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
if (!list_empty(&mirror->pg_list)) {
int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
- else
+ if (list_empty(&mirror->pg_list)) {
mirror->pg_bytes_written += mirror->pg_count;
- }
- if (list_empty(&mirror->pg_list)) {
- mirror->pg_count = 0;
- mirror->pg_base = 0;
+ mirror->pg_count = 0;
+ mirror->pg_base = 0;
+ mirror->pg_recoalesce = 0;
+ }
}
}
@@ -1089,7 +1088,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
do {
list_splice_init(&mirror->pg_list, &head);
- mirror->pg_bytes_written -= mirror->pg_count;
mirror->pg_count = 0;
mirror->pg_base = 0;
mirror->pg_recoalesce = 0;
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 77d136ac8909..c21fca0dcba7 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -75,10 +75,14 @@ __state_in_grace(struct net *net, bool open)
if (!open)
return !list_empty(grace_list);
+ spin_lock(&grace_lock);
list_for_each_entry(lm, grace_list, list) {
- if (lm->block_opens)
+ if (lm->block_opens) {
+ spin_unlock(&grace_lock);
return true;
+ }
}
+ spin_unlock(&grace_lock);
return false;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d4fa7fbc37dc..d6c443a874f2 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -821,9 +821,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
if (isdotent(name, namlen)) {
if (namlen == 2) {
dchild = dget_parent(dparent);
- /* filesystem root - cannot return filehandle for ".." */
+ /*
+ * Don't return filehandle for ".." if we're at
+ * the filesystem or export root:
+ */
if (dchild == dparent)
goto out;
+ if (dparent == exp->ex_path.dentry)
+ goto out;
} else
dchild = dget(dparent);
} else
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4fa3f0ba9ab3..0a0b41071ed7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1096,6 +1096,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
+ if (c)
+ svc_xprt_put(c->cn_xprt);
return;
}
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ee0da259a3d3..87708608c0ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2988,15 +2988,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
goto fail;
cd->rd_maxcount -= entry_bytes;
/*
- * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
- * let's always let through the first entry, at least:
+ * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and
+ * notes that it could be zero. If it is zero, then the server
+ * should enforce only the rd_maxcount value.
*/
- if (!cd->rd_dircount)
- goto fail;
- name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
- if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
- goto fail;
- cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (cd->rd_dircount) {
+ name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
+ if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
+ goto fail;
+ cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (!cd->rd_dircount)
+ cd->rd_maxcount = 0;
+ }
cd->cookie_offset = cookie_offset;
skip_entry:
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0cd57db5c5af..dfd1949b31ea 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -768,7 +768,10 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- nfsd_destroy(net);
+ if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+ nn->nfsd_serv->sv_nrthreads--;
+ else
+ nfsd_destroy(net);
return err;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 34c22fe4eca0..d58c0c62b2ae 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2781,6 +2781,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
+ inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (err) {
kfree(nilfs->ns_writer);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index bbb0dcc35905..49a148ebbcda 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -73,11 +73,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \
#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
static void nilfs_##name##_attr_release(struct kobject *kobj) \
{ \
- struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
- struct the_nilfs *nilfs = container_of(kobj->parent, \
- struct the_nilfs, \
- ns_##parent_name##_kobj); \
- subgroups = nilfs->ns_##parent_name##_subgroups; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \
+ struct nilfs_sysfs_##parent_name##_subgroups, \
+ sg_##name##_kobj); \
complete(&subgroups->sg_##name##_kobj_unregister); \
} \
static struct kobj_type nilfs_##name##_ktype = { \
@@ -103,12 +101,12 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
#name); \
if (err) \
- return err; \
- return 0; \
+ kobject_put(kobj); \
+ return err; \
} \
static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
{ \
- kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+ kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
}
/************************************************************************
@@ -219,14 +217,14 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
}
if (err)
- return err;
+ kobject_put(&root->snapshot_kobj);
- return 0;
+ return err;
}
void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
{
- kobject_del(&root->snapshot_kobj);
+ kobject_put(&root->snapshot_kobj);
}
/************************************************************************
@@ -1008,7 +1006,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb)
err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
"%s", sb->s_id);
if (err)
- goto free_dev_subgroups;
+ goto cleanup_dev_kobject;
err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
if (err)
@@ -1045,9 +1043,7 @@ delete_mounted_snapshots_group:
nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
cleanup_dev_kobject:
- kobject_del(&nilfs->ns_dev_kobj);
-
-free_dev_subgroups:
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
failed_create_device_group:
@@ -1062,6 +1058,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
nilfs_sysfs_delete_superblock_group(nilfs);
nilfs_sysfs_delete_segctor_group(nilfs);
kobject_del(&nilfs->ns_dev_kobj);
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07eda77..8d4d58b12972 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -502,7 +502,7 @@ err_corrupt_attr:
}
file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
le16_to_cpu(attr->data.resident.value_offset));
- p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length);
+ p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
if (p2 < (u8*)attr || p2 > p)
goto err_corrupt_attr;
/* This attribute is ok, but is it in the $Extend directory? */
@@ -661,6 +661,12 @@ static int ntfs_read_locked_inode(struct inode *vi)
}
a = ctx->attr;
/* Get the standard information attribute value. */
+ if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
+ + le32_to_cpu(a->data.resident.value_length) >
+ (u8 *)ctx->mrec + vol->mft_record_size) {
+ ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
+ goto unm_err_out;
+ }
si = (STANDARD_INFORMATION*)((u8*)a +
le16_to_cpu(a->data.resident.value_offset));
@@ -1844,6 +1850,12 @@ int ntfs_read_inode_mount(struct inode *vi)
brelse(bh);
}
+ if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
+ ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
+ le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
+ goto err_out;
+ }
+
/* Apply the mst fixups. */
if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
/* FIXME: Try to use the $MFTMirr now. */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1d738723a41a..0de92ad0ba79 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1532,6 +1532,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
}
}
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ * is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+ u64 start, u64 len)
+{
+ int ret;
+ u64 start_block, end_block, nr_blocks;
+ u64 p_block, offset;
+ u32 cluster, p_cluster, nr_clusters;
+ struct super_block *sb = inode->i_sb;
+ u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+ if (start + len < end)
+ end = start + len;
+
+ start_block = ocfs2_blocks_for_bytes(sb, start);
+ end_block = ocfs2_blocks_for_bytes(sb, end);
+ nr_blocks = end_block - start_block;
+ if (!nr_blocks)
+ return 0;
+
+ cluster = ocfs2_bytes_to_clusters(sb, start);
+ ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+ &nr_clusters, NULL);
+ if (ret)
+ return ret;
+ if (!p_cluster)
+ return 0;
+
+ offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+ p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+ return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
@@ -1541,6 +1580,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
+ loff_t isize = i_size_read(inode);
/*
* The "start" and "end" values are NOT necessarily part of
@@ -1561,6 +1601,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
goto out;
+ /* No page cache for EOF blocks, issue zero out to disk. */
+ if (end > isize) {
+ /*
+ * zeroout eof blocks in last cluster starting from
+ * "isize" even "start" > "isize" because it is
+ * complicated to zeroout just at "start" as "start"
+ * may be not aligned with block size, buffer write
+ * would be required to do that, but out of eof buffer
+ * write is not supported.
+ */
+ ret = ocfs2_zeroout_partial_cluster(inode, isize,
+ end - isize);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ if (start >= isize)
+ goto out;
+ end = isize;
+ }
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1869,7 +1929,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
{
int ret;
s64 llen;
- loff_t size;
+ loff_t size, orig_isize;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
handle_t *handle;
@@ -1961,6 +2021,15 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
default:
ret = -EINVAL;
}
+
+ orig_isize = i_size_read(inode);
+ /* zeroout eof blocks in the cluster. */
+ if (!ret && change_size && orig_isize < size) {
+ ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
+ size - orig_isize);
+ if (!ret)
+ i_size_write(inode, size);
+ }
up_write(&OCFS2_I(inode)->ip_alloc_sem);
if (ret) {
mlog_errno(ret);
@@ -1977,9 +2046,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}
- if (change_size && i_size_read(inode) < size)
- i_size_write(inode, size);
-
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 2495066a9ca3..9e7f39b17e67 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -337,8 +337,8 @@ struct ocfs2_super
spinlock_t osb_lock;
u32 s_next_generation;
unsigned long osb_flags;
- s16 s_inode_steal_slot;
- s16 s_meta_steal_slot;
+ u16 s_inode_steal_slot;
+ u16 s_meta_steal_slot;
atomic_t s_num_inodes_stolen;
atomic_t s_num_meta_stolen;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 540ab5b75dbb..5617ec167a9d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -304,7 +304,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
@@ -340,8 +340,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index fc6d25f6d444..41a67c9b37e0 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -895,9 +895,9 @@ static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
{
spin_lock(&osb->osb_lock);
if (type == INODE_ALLOC_SYSTEM_INODE)
- osb->s_inode_steal_slot = slot;
+ osb->s_inode_steal_slot = (u16)slot;
else if (type == EXTENT_ALLOC_SYSTEM_INODE)
- osb->s_meta_steal_slot = slot;
+ osb->s_meta_steal_slot = (u16)slot;
spin_unlock(&osb->osb_lock);
}
@@ -2863,9 +2863,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- inode_alloc_inode =
- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
- suballoc_slot);
+ if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
+ else
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4f5141350af8..337f0628c378 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -96,7 +96,7 @@ struct mount_options
unsigned long commit_interval;
unsigned long mount_opt;
unsigned int atime_quantum;
- signed short slot;
+ unsigned short slot;
int localalloc_opt;
unsigned int resv_level;
int dir_resv_level;
@@ -1372,7 +1372,7 @@ static int ocfs2_parse_options(struct super_block *sb,
goto bail;
}
if (option)
- mopt->slot = (s16)option;
+ mopt->slot = (u16)option;
break;
case Opt_commit:
option = 0;
@@ -1751,6 +1751,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
@@ -2207,11 +2208,17 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
if (ocfs2_clusterinfo_valid(osb)) {
+ /*
+ * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
+ * terminated, so make sure no overflow happens here by using
+ * memcpy. Destination strings will always be null terminated
+ * because osb is allocated using kzalloc.
+ */
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
- strlcpy(osb->osb_cluster_stack,
+ memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
- OCFS2_STACK_LABEL_LEN + 1);
+ OCFS2_STACK_LABEL_LEN);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
@@ -2220,9 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = -EINVAL;
goto bail;
}
- strlcpy(osb->osb_cluster_name,
+ memcpy(osb->osb_cluster_name,
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
- OCFS2_CLUSTER_NAME_LEN + 1);
+ OCFS2_CLUSTER_NAME_LEN);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 64c5386d0c1b..3972ac87a8cb 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -24,7 +24,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
- int uninitialized_var(error);
+ int error = 0;
size_t slen;
if (!old->d_inode->i_op->getxattr ||
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index eedacae889b9..80bf0ab52e81 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -824,9 +824,13 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
}
} else {
new_create = true;
- if (!d_is_negative(newdentry) &&
- (!new_opaque || !ovl_is_whiteout(newdentry)))
- goto out_dput;
+ if (!d_is_negative(newdentry)) {
+ if (!new_opaque || !ovl_is_whiteout(newdentry))
+ goto out_dput;
+ } else {
+ if (flags & RENAME_EXCHANGE)
+ goto out_dput;
+ }
}
if (olddentry == trap)
diff --git a/fs/pipe.c b/fs/pipe.c
index 6534470a6c19..37a003b645ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -28,6 +28,21 @@
#include "internal.h"
/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
+/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
@@ -621,7 +636,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (!too_many_pipe_buffers_hard(user)) {
if (too_many_pipe_buffers_soft(user))
- pipe_bufs = 1;
+ pipe_bufs = PIPE_MIN_DEF_BUFFERS;
pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
}
diff --git a/fs/pnode.c b/fs/pnode.c
index d15c63e97ef1..64e9a401d67d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -268,14 +268,13 @@ static int propagate_one(struct mount *m)
if (IS_ERR(child))
return PTR_ERR(child);
child->mnt.mnt_flags &= ~MNT_LOCKED;
+ read_seqlock_excl(&mount_lock);
mnt_set_mountpoint(m, mp, child);
+ if (m->mnt_master != dest_master)
+ SET_MNT_MARK(m->mnt_master);
+ read_sequnlock_excl(&mount_lock);
last_dest = m;
last_source = child;
- if (m->mnt_master != dest_master) {
- read_seqlock_excl(&mount_lock);
- SET_MNT_MARK(m->mnt_master);
- read_sequnlock_excl(&mount_lock);
- }
hlist_add_head(&child->mnt_hash, list);
return count_mounts(m->mnt_ns, child);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index bd8c26a409a7..4d68f5a9e4aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -887,7 +887,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
flags |= FOLL_WRITE;
while (count > 0) {
- int this_len = min_t(int, count, PAGE_SIZE);
+ size_t this_len = min_t(size_t, count, PAGE_SIZE);
if (write && copy_from_user(page, buf, this_len)) {
copied = -EFAULT;
@@ -2384,6 +2384,13 @@ out:
}
#ifdef CONFIG_SECURITY
+static int proc_pid_attr_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+ __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ return 0;
+}
+
static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
@@ -2413,6 +2420,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
ssize_t length;
struct task_struct *task = get_proc_task(inode);
+ /* A task may only write when it was the opener. */
+ if (file->private_data != current->mm)
+ return -EPERM;
+
length = -ESRCH;
if (!task)
goto out_no_task;
@@ -2451,9 +2462,11 @@ out_no_task:
}
static const struct file_operations proc_pid_attr_operations = {
+ .open = proc_pid_attr_open,
.read = proc_pid_attr_read,
.write = proc_pid_attr_write,
.llseek = generic_file_llseek,
+ .release = mem_release,
};
static const struct pid_entry attr_dir_stuff[] = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd95b9fdebb0..82140dbc03b7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -415,7 +415,7 @@ const struct inode_operations proc_link_inode_operations = {
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
- struct inode *inode = new_inode_pseudo(sb);
+ struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = de->low_ino;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 113b8d061fc0..dffbe533d53f 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -24,6 +24,13 @@ static const char *proc_self_follow_link(struct dentry *dentry, void **cookie)
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
+ /*
+ * Not currently supported. Once we can inherit all of struct pid,
+ * we can allow this.
+ */
+ if (current->flags & PF_KTHREAD)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
@@ -51,7 +58,7 @@ int proc_setup_self(struct super_block *s)
mutex_lock(&root_inode->i_mutex);
self = d_alloc_name(s->s_root, "self");
if (self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 947b0f4fd0a1..4b186aac3011 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -52,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
mutex_lock(&root_inode->i_mutex);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = thread_self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index b218f965817b..613cc38c9efa 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -14,13 +14,48 @@
#include <linux/buffer_head.h>
#include "qnx4.h"
+/*
+ * A qnx4 directory entry is an inode entry or link info
+ * depending on the status field in the last byte. The
+ * first byte is where the name start either way, and a
+ * zero means it's empty.
+ *
+ * Also, due to a bug in gcc, we don't want to use the
+ * real (differently sized) name arrays in the inode and
+ * link entries, but always the 'de_name[]' one in the
+ * fake struct entry.
+ *
+ * See
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6
+ *
+ * for details, but basically gcc will take the size of the
+ * 'name' array from one of the used union entries randomly.
+ *
+ * This use of 'de_name[]' (48 bytes) avoids the false positive
+ * warnings that would happen if gcc decides to use 'inode.di_name'
+ * (16 bytes) even when the pointer and size were to come from
+ * 'link.dl_name' (48 bytes).
+ *
+ * In all cases the actual name pointer itself is the same, it's
+ * only the gcc internal 'what is the size of this field' logic
+ * that can get confused.
+ */
+union qnx4_directory_entry {
+ struct {
+ const char de_name[48];
+ u8 de_pad[15];
+ u8 de_status;
+ };
+ struct qnx4_inode_entry inode;
+ struct qnx4_link_info link;
+};
+
static int qnx4_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
unsigned int offset;
struct buffer_head *bh;
- struct qnx4_inode_entry *de;
- struct qnx4_link_info *le;
unsigned long blknum;
int ix, ino;
int size;
@@ -37,27 +72,27 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx)
}
ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK;
for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) {
+ union qnx4_directory_entry *de;
+
offset = ix * QNX4_DIR_ENTRY_SIZE;
- de = (struct qnx4_inode_entry *) (bh->b_data + offset);
- if (!de->di_fname[0])
+ de = (union qnx4_directory_entry *) (bh->b_data + offset);
+
+ if (!de->de_name[0])
continue;
- if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK)))
+ if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK)))
continue;
- if (!(de->di_status & QNX4_FILE_LINK))
- size = QNX4_SHORT_NAME_MAX;
- else
- size = QNX4_NAME_MAX;
- size = strnlen(de->di_fname, size);
- QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname));
- if (!(de->di_status & QNX4_FILE_LINK))
+ if (!(de->de_status & QNX4_FILE_LINK)) {
+ size = sizeof(de->inode.di_fname);
ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
- else {
- le = (struct qnx4_link_info*)de;
- ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
+ } else {
+ size = sizeof(de->link.dl_fname);
+ ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) *
QNX4_INODES_PER_BLOCK +
- le->dl_inode_ndx;
+ de->link.dl_inode_ndx;
}
- if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) {
+ size = strnlen(de->de_name, size);
+ QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name));
+ if (!dir_emit(ctx, de->de_name, size, ino, DT_UNKNOWN)) {
brelse(bh);
return 0;
}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 58efb83dec1c..3069b1186719 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -55,7 +55,7 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
memset(buf, 0, info->dqi_usable_bs);
return sb->s_op->quota_read(sb, info->dqi_type, buf,
- info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
}
static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
@@ -64,7 +64,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
ssize_t ret;
ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
- info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
if (ret != info->dqi_usable_bs) {
quota_error(sb, "dquota write failed");
if (ret >= 0)
@@ -277,7 +277,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
blk);
goto out_buf;
}
- dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+ dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) +
sizeof(struct qt_disk_dqdbheader) +
i * info->dqi_entry_size;
kfree(buf);
@@ -552,7 +552,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
ret = -EIO;
goto out_buf;
} else {
- ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+ ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct
qt_disk_dqdbheader) + i * info->dqi_entry_size;
}
out_buf:
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 2aa012a68e90..9891b8fb0432 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -266,6 +266,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
+ d->dqb_pad = 0;
if (qtree_entry_unused(info, dp))
d->dqb_itime = cpu_to_le64(1);
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 60ba35087d12..ccbb15ab029f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1553,11 +1553,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
* set version 1, version 2 could be used too, because stat data
* key is the same in both versions
*/
- key.version = KEY_FORMAT_3_5;
- key.on_disk_key.k_dir_id = dirino;
- key.on_disk_key.k_objectid = inode->i_ino;
- key.on_disk_key.k_offset = 0;
- key.on_disk_key.k_type = 0;
+ _make_cpu_key(&key, KEY_FORMAT_3_5, dirino, inode->i_ino, 0, 0, 3);
/* look for the object's stat data */
retval = search_item(inode->i_sb, &key, &path_to_sd);
@@ -2161,7 +2157,8 @@ out_end_trans:
out_inserted_sd:
clear_nlink(inode);
th->t_trans_id = 0; /* so the caller can't use this handle later */
- unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
iput(inode);
return err;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 00985f9db9f7..6a0fa0cdc1ed 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2770,6 +2770,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return;
}
+ /*
+ * Sanity check to see if journal first block is correct.
+ * If journal first block is invalid it can cause
+ * zeroing important superblock members.
+ */
+ if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
+ SB_ONDISK_JOURNAL_1st_BLOCK(sb) < SB_JOURNAL_1st_RESERVED_BLOCK(sb)) {
+ reiserfs_warning(sb, "journal-1393",
+ "journal 1st super block is invalid: 1st reserved block %d, but actual 1st block is %d",
+ SB_JOURNAL_1st_RESERVED_BLOCK(sb),
+ SB_ONDISK_JOURNAL_1st_BLOCK(sb));
+ goto free_and_return;
+ }
+
if (journal_init_dev(sb, journal, j_dev_name) != 0) {
reiserfs_warning(sb, "sh-462",
"unable to initialize journal device");
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index e3a4cbad9620..13322c39e6cc 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -386,6 +386,24 @@ void pathrelse(struct treepath *search_path)
search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+ struct reiserfs_de_head *deh;
+ int i;
+
+ deh = B_I_DEH(bh, ih);
+ for (i = 0; i < ih_entry_count(ih); i++) {
+ if (deh_location(&deh[i]) > ih_item_len(ih)) {
+ reiserfs_warning(NULL, "reiserfs-5094",
+ "directory entry location seems wrong %h",
+ &deh[i]);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
{
struct block_head *blkh;
@@ -453,6 +471,15 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
"(second one): %h", ih);
return 0;
}
+ if (is_direntry_le_ih(ih)) {
+ if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+ reiserfs_warning(NULL, "reiserfs-5093",
+ "item entry count seems wrong %h",
+ ih);
+ return 0;
+ }
+ return has_valid_deh_location(bh, ih);
+ }
prev_location = ih_location(ih);
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f9796fd51531..2ffcbe451202 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1232,6 +1232,10 @@ static int reiserfs_parse_options(struct super_block *s,
"turned on.");
return 0;
}
+ if (qf_names[qtype] !=
+ REISERFS_SB(s)->s_qf_names[qtype])
+ kfree(qf_names[qtype]);
+ qf_names[qtype] = NULL;
if (*arg) { /* Some filename specified? */
if (REISERFS_SB(s)->s_qf_names[qtype]
&& strcmp(REISERFS_SB(s)->s_qf_names[qtype],
@@ -1261,10 +1265,6 @@ static int reiserfs_parse_options(struct super_block *s,
else
*mount_options |= 1 << REISERFS_GRPQUOTA;
} else {
- if (qf_names[qtype] !=
- REISERFS_SB(s)->s_qf_names[qtype])
- kfree(qf_names[qtype]);
- qf_names[qtype] = NULL;
if (qtype == USRQUOTA)
*mount_options &= ~(1 << REISERFS_USRQUOTA);
else
@@ -2050,6 +2050,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
unlock_new_inode(root_inode);
}
+ if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+ !root_inode->i_size) {
+ SWARN(silent, s, "", "corrupt root inode, run fsck");
+ iput(root_inode);
+ errval = -EUCLEAN;
+ goto error;
+ }
+
s->s_root = d_make_root(root_inode);
if (!s->s_root)
goto error;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d424b3d4bf3b..92d39cbc2d64 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -656,6 +656,13 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
+ /*
+ * priv_root needn't be initialized during mount so allow initial
+ * lookups to succeed.
+ */
+ if (!REISERFS_SB(inode->i_sb)->priv_root)
+ return 0;
+
dentry = xattr_lookup(inode, name, XATTR_REPLACE);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index 613ff5aef94e..19ca3745301f 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -42,7 +42,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec);
static inline int reiserfs_xattrs_initialized(struct super_block *sb)
{
- return REISERFS_SB(sb)->priv_root != NULL;
+ return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root;
}
#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index f86f51f99ace..1dcadd22b440 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -221,10 +221,8 @@ int romfs_dev_read(struct super_block *sb, unsigned long pos,
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 6dc4296eed62..95e730506ad2 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/string_helpers.h>
+#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -28,6 +29,9 @@ static void *seq_buf_alloc(unsigned long size)
void *buf;
gfp_t gfp = GFP_KERNEL;
+ if (unlikely(size > MAX_RW_COUNT))
+ return NULL;
+
/*
* For high order allocations, use __GFP_NORETRY to avoid oom-killing -
* it's better to fall back to vmalloc() than to kill things. For small
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 8073b6532cf0..1d406a2094a5 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -54,12 +54,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num)
struct squashfs_sb_info *msblk = sb->s_fs_info;
int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1);
int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1);
- u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+ u64 start;
__le64 ino;
int err;
TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num);
+ if (ino_num == 0 || (ino_num - 1) >= msblk->inodes)
+ return -EINVAL;
+
+ start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+
err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino));
if (err < 0)
return err;
@@ -124,7 +129,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
u64 lookup_table_start, u64 next_table, unsigned int inodes)
{
unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
+ unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes);
+ int n;
__le64 *table;
+ u64 start, end;
TRACE("In read_inode_lookup_table, length %d\n", length);
@@ -134,20 +142,41 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
if (inodes == 0)
return ERR_PTR(-EINVAL);
- /* length bytes should not extend into the next table - this check
- * also traps instances where lookup_table_start is incorrectly larger
- * than the next table start
+ /*
+ * The computed size of the lookup table (length bytes) should exactly
+ * match the table start and end points
*/
- if (lookup_table_start + length > next_table)
+ if (length != (next_table - lookup_table_start))
return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, lookup_table_start, length);
+ if (IS_ERR(table))
+ return table;
/*
- * table[0] points to the first inode lookup table metadata block,
- * this should be less than lookup_table_start
+ * table0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed inode lookup blocks. Each entry should be
+ * less than the next (i.e. table[0] < table[1]), and the difference
+ * between them should be SQUASHFS_METADATA_SIZE or less.
+ * table[indexes - 1] should be less than lookup_table_start, and
+ * again the difference should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end
+ || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= lookup_table_start ||
+ (lookup_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 1ec7bae2751d..979da17cbbf3 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -224,11 +224,11 @@ failure:
* If the skip factor is limited in this way then the file will use multiple
* slots.
*/
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
{
- int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+ u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
* SQUASHFS_META_INDEXES);
- return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+ return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index d38ea3dab951..d2e15baab537 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -48,10 +48,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_ID_BLOCK(index);
int offset = SQUASHFS_ID_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->id_table[block]);
+ u64 start_block;
__le32 disk_id;
int err;
+ if (index >= msblk->ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->id_table[block]);
+
err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset,
sizeof(disk_id));
if (err < 0)
@@ -69,7 +74,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
u64 id_table_start, u64 next_table, unsigned short no_ids)
{
unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
+ unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids);
+ int n;
__le64 *table;
+ u64 start, end;
TRACE("In read_id_index_table, length %d\n", length);
@@ -80,20 +88,38 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
return ERR_PTR(-EINVAL);
/*
- * length bytes should not extend into the next table - this check
- * also traps instances where id_table_start is incorrectly larger
- * than the next table start
+ * The computed size of the index table (length bytes) should exactly
+ * match the table start and end points
*/
- if (id_table_start + length > next_table)
+ if (length != (next_table - id_table_start))
return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, id_table_start, length);
+ if (IS_ERR(table))
+ return table;
/*
- * table[0] points to the first id lookup table metadata block, this
- * should be less than id_table_start
+ * table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than id_table_start, and again the difference
+ * should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= id_table_start || (id_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index e66486366f02..2fd1262cc1bd 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -30,6 +30,7 @@
/* size of metadata (inode and directory) blocks */
#define SQUASHFS_METADATA_SIZE 8192
+#define SQUASHFS_BLOCK_OFFSET 2
/* default size of block device I/O */
#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index ef69c31947bf..5234c19a0eab 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -77,5 +77,6 @@ struct squashfs_sb_info {
unsigned int inodes;
unsigned int fragments;
int xattr_ids;
+ unsigned int ids;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 93aa3e23c845..44500dcf1805 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -177,6 +177,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
msblk->fragments = le32_to_cpu(sblk->fragments);
+ msblk->ids = le16_to_cpu(sblk->no_ids);
flags = le16_to_cpu(sblk->flags);
TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b));
@@ -188,7 +189,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
TRACE("Block size %d\n", msblk->block_size);
TRACE("Number of inodes %d\n", msblk->inodes);
TRACE("Number of fragments %d\n", msblk->fragments);
- TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
+ TRACE("Number of ids %d\n", msblk->ids);
TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
TRACE("sblk->fragment_table_start %llx\n",
@@ -245,8 +246,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
allocate_id_index_table:
/* Allocate and read id index table */
msblk->id_table = squashfs_read_id_index_table(sb,
- le64_to_cpu(sblk->id_table_start), next_table,
- le16_to_cpu(sblk->no_ids));
+ le64_to_cpu(sblk->id_table_start), next_table, msblk->ids);
if (IS_ERR(msblk->id_table)) {
ERROR("unable to read id index table\n");
err = PTR_ERR(msblk->id_table);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index c83f5d9ec125..30b3aaa08b62 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -30,8 +30,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
{
+ struct squashfs_xattr_id_table *id_table;
+
+ id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ if (IS_ERR(id_table))
+ return (__le64 *) id_table;
+
+ *xattr_table_start = le64_to_cpu(id_table->xattr_table_start);
+ kfree(id_table);
+
ERROR("Xattrs in filesystem, these will be ignored\n");
- *xattr_table_start = start;
return ERR_PTR(-ENOTSUPP);
}
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index c89607d690c4..7f718d2bf357 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -44,10 +44,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_XATTR_BLOCK(index);
int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+ u64 start_block;
struct squashfs_xattr_id id;
int err;
+ if (index >= msblk->xattr_ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+
err = squashfs_read_metadata(sb, &id, &start_block, &offset,
sizeof(id));
if (err < 0)
@@ -63,13 +68,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
/*
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
-__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
+__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
u64 *xattr_table_start, int *xattr_ids)
{
- unsigned int len;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ unsigned int len, indexes;
struct squashfs_xattr_id_table *id_table;
+ __le64 *table;
+ u64 start, end;
+ int n;
- id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ id_table = squashfs_read_table(sb, table_start, sizeof(*id_table));
if (IS_ERR(id_table))
return (__le64 *) id_table;
@@ -83,13 +92,54 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
if (*xattr_ids == 0)
return ERR_PTR(-EINVAL);
- /* xattr_table should be less than start */
- if (*xattr_table_start >= start)
+ len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids);
+
+ /*
+ * The computed size of the index table (len bytes) should exactly
+ * match the table start and end points
+ */
+ start = table_start + sizeof(*id_table);
+ end = msblk->bytes_used;
+
+ if (len != (end - start))
return ERR_PTR(-EINVAL);
- len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ table = squashfs_read_table(sb, start, len);
+ if (IS_ERR(table))
+ return table;
+
+ /* table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed xattr id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than table_start, and again the difference
+ * shouls be SQUASHFS_METADATA_SIZE or less.
+ *
+ * Finally xattr_table_start should be less than table[0].
+ */
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= table_start || (table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
- TRACE("In read_xattr_index_table, length %d\n", len);
+ if (*xattr_table_start >= le64_to_cpu(table[0])) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
- return squashfs_read_table(sb, start + sizeof(*id_table), len);
+ return table;
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 666986b95c5d..300cdbdc8494 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -17,6 +17,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
+#include <linux/mm.h>
#include "sysfs.h"
#include "../kernfs/kernfs-internal.h"
@@ -549,3 +550,57 @@ void sysfs_remove_bin_file(struct kobject *kobj,
kernfs_remove_by_name(kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
+
+/**
+ * sysfs_emit - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @fmt: format
+ * @...: optional arguments to @format
+ *
+ *
+ * Returns number of characters written to @buf.
+ */
+int sysfs_emit(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ if (WARN(!buf || offset_in_page(buf),
+ "invalid sysfs_emit: buf:%p\n", buf))
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(buf, PAGE_SIZE, fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit);
+
+/**
+ * sysfs_emit_at - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @at: offset in @buf to start write in bytes
+ * @at must be >= 0 && < PAGE_SIZE
+ * @fmt: format
+ * @...: optional arguments to @fmt
+ *
+ *
+ * Returns number of characters written starting at &@buf[@at].
+ */
+int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE,
+ "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at))
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(buf + at, PAGE_SIZE - at, fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit_at);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 1327a02ec778..ab8dd1538381 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -400,6 +400,11 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
clockid != CLOCK_BOOTTIME_ALARM))
return -EINVAL;
+ if (!capable(CAP_WAKE_ALARM) &&
+ (clockid == CLOCK_REALTIME_ALARM ||
+ clockid == CLOCK_BOOTTIME_ALARM))
+ return -EPERM;
+
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -444,6 +449,11 @@ static int do_timerfd_settime(int ufd, int flags,
return ret;
ctx = f.file->private_data;
+ if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+ fdput(f);
+ return -EPERM;
+ }
+
timerfd_setup_cancel(ctx, flags);
/*
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 595ca0debe11..09134a13a39c 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1125,6 +1125,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
err = PTR_ERR(dent);
if (err == -ENOENT)
break;
+ kfree(pdent);
return err;
}
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 97be41215332..99caaae01cab 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -237,7 +237,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
int offs, int quiet, int must_chk_crc)
{
- int err = -EINVAL, type, node_len;
+ int err = -EINVAL, type, node_len, dump_node = 1;
uint32_t crc, node_crc, magic;
const struct ubifs_ch *ch = buf;
@@ -290,10 +290,22 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
out_len:
if (!quiet)
ubifs_err(c, "bad node length %d", node_len);
+ if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ)
+ dump_node = 0;
out:
if (!quiet) {
ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
- ubifs_dump_node(c, buf);
+ if (dump_node) {
+ ubifs_dump_node(c, buf);
+ } else {
+ int safe_len = min3(node_len, c->leb_size - offs,
+ (int)UBIFS_MAX_DATA_NODE_SZ);
+ pr_err("\tprevent out-of-bounds memory access\n");
+ pr_err("\ttruncated data node length %d\n", safe_len);
+ pr_err("\tcorrupted data node:\n");
+ print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
+ buf, safe_len, 0);
+ }
dump_stack();
}
return err;
@@ -319,7 +331,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;
- ubifs_assert(pad >= 0 && !(pad & 7));
+ ubifs_assert(pad >= 0);
if (pad >= UBIFS_PAD_NODE_SZ) {
struct ubifs_ch *ch = buf;
@@ -709,6 +721,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
* write-buffer.
*/
memcpy(wbuf->buf + wbuf->used, buf, len);
+ if (aligned_len > len) {
+ ubifs_assert(aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+ }
if (aligned_len == wbuf->avail) {
dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -801,13 +817,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
}
spin_lock(&wbuf->lock);
- if (aligned_len)
+ if (aligned_len) {
/*
* And now we have what's left and what does not take whole
* max. write unit, so write it to the write-buffer and we are
* done.
*/
memcpy(wbuf->buf, buf + written, len);
+ if (aligned_len > len) {
+ ubifs_assert(aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+ }
+ }
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3876448ec0dc..2c39c1c81196 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -140,21 +140,24 @@ void udf_evict_inode(struct inode *inode)
struct udf_inode_info *iinfo = UDF_I(inode);
int want_delete = 0;
- if (!inode->i_nlink && !is_bad_inode(inode)) {
- want_delete = 1;
- udf_setsize(inode, 0);
- udf_update_inode(inode, IS_SYNC(inode));
+ if (!is_bad_inode(inode)) {
+ if (!inode->i_nlink) {
+ want_delete = 1;
+ udf_setsize(inode, 0);
+ udf_update_inode(inode, IS_SYNC(inode));
+ }
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
+ inode->i_size != iinfo->i_lenExtents) {
+ udf_warn(inode->i_sb,
+ "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
+ inode->i_ino, inode->i_mode,
+ (unsigned long long)inode->i_size,
+ (unsigned long long)iinfo->i_lenExtents);
+ }
}
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
clear_inode(inode);
- if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
- inode->i_size != iinfo->i_lenExtents) {
- udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
- inode->i_ino, inode->i_mode,
- (unsigned long long)inode->i_size,
- (unsigned long long)iinfo->i_lenExtents);
- }
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
udf_clear_extent_cache(inode);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 71d1c25f360d..8c7f9ea251e5 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -175,13 +175,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
else
offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) {
+ while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) {
+ uint32_t attrLength;
+
gaf = (struct genericFormat *)&ea[offset];
+ attrLength = le32_to_cpu(gaf->attrLength);
+
+ /* Detect undersized elements and buffer overflows */
+ if ((attrLength < sizeof(*gaf)) ||
+ (attrLength > (iinfo->i_lenEAttr - offset)))
+ break;
+
if (le32_to_cpu(gaf->attrType) == type &&
gaf->attrSubtype == subtype)
return gaf;
else
- offset += le32_to_cpu(gaf->attrLength);
+ offset += attrLength;
}
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f34c545f4e54..074560ad190e 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -945,6 +945,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
iinfo->i_location.partitionReferenceNum,
0);
epos.bh = udf_tgetblk(sb, block);
+ if (unlikely(!epos.bh)) {
+ err = -ENOMEM;
+ goto out_no_entry;
+ }
lock_buffer(epos.bh);
memset(epos.bh->b_data, 0x00, bsize);
set_buffer_uptodate(epos.bh);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 159977ec8e54..710f1b8fad9b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1390,6 +1390,12 @@ static int udf_load_sparable_map(struct super_block *sb,
(int)spm->numSparingTables);
return -EIO;
}
+ if (le32_to_cpu(spm->sizeSparingTable) > sb->s_blocksize) {
+ udf_err(sb, "error loading logical volume descriptor: "
+ "Too big sparing table size (%u)\n",
+ le32_to_cpu(spm->sizeSparingTable));
+ return -EIO;
+ }
for (i = 0; i < spm->numSparingTables; i++) {
loc = le32_to_cpu(spm->locSparingTable[i]);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 10f364490833..be68b48de1cc 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -99,7 +99,7 @@ static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gene
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct inode *inode;
- if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg)
+ if (ino < UFS_ROOTINO || ino > (u64)uspi->s_ncg * uspi->s_ipg)
return ERR_PTR(-ESTALE);
inode = ufs_iget(sb, ino);
diff --git a/fs/xattr.c b/fs/xattr.c
index 09441c396798..5ba5565609ee 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -735,6 +735,8 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (!buffer) {
for_each_xattr_handler(handlers, handler) {
+ if (!handler->list)
+ continue;
size += handler->list(handler, dentry, NULL, 0,
NULL, 0);
}
@@ -742,6 +744,8 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
char *buf = buffer;
for_each_xattr_handler(handlers, handler) {
+ if (!handler->list)
+ continue;
size = handler->list(handler, dentry, buf, buffer_size,
NULL, 0);
if (size > buffer_size)
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 01a5ecfedfcf..4539ff4d351f 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -514,8 +514,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
ASSERT(ifp->if_flags & XFS_IFINLINE);
}
xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
- hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
- hdr->count = 0;
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ memset(hdr, 0, sizeof(*hdr));
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -779,9 +779,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
ASSERT(blkno == 0);
error = xfs_attr3_leaf_create(args, blkno, &bp);
if (error) {
- error = xfs_da_shrink_inode(args, 0, bp);
- bp = NULL;
- if (error)
+ /* xfs_attr3_leaf_create may not have instantiated a block */
+ if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
goto out;
xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
@@ -1327,7 +1326,9 @@ xfs_attr3_leaf_add_work(
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (ichdr->freemap[i].base == tmp) {
ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
- ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
+ ichdr->freemap[i].size -=
+ min_t(uint16_t, ichdr->freemap[i].size,
+ sizeof(xfs_attr_leaf_entry_t));
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d98ba57ef01a..c167bdaafc50 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -793,6 +793,8 @@ xfs_bmap_extents_to_btree(
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index adbc1f59969a..d8cdab4bfd30 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -135,6 +135,46 @@ xfs_inode_free(
}
/*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * then check we didn't find a free inode.
+ *
+ * Returns:
+ * 0 if the inode free state matches the lookup context
+ * -ENOENT if the inode is free and we are not allocating
+ * -EFSCORRUPTED if there is any state mismatch at all
+ */
+static int
+xfs_iget_check_free_state(
+ struct xfs_inode *ip,
+ int flags)
+{
+ if (flags & XFS_IGET_CREATE) {
+ /* should be a free inode */
+ if (ip->i_d.di_mode != 0) {
+ xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
+ ip->i_ino, ip->i_d.di_mode);
+ return -EFSCORRUPTED;
+ }
+
+ if (ip->i_d.di_nblocks != 0) {
+ xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+ ip->i_ino);
+ return -EFSCORRUPTED;
+ }
+ return 0;
+ }
+
+ /* should be an allocated inode */
+ if (ip->i_d.di_mode == 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+/*
* Check the validity of the inode we just found it the cache
*/
static int
@@ -183,12 +223,12 @@ xfs_iget_cache_hit(
}
/*
- * If lookup is racing with unlink return an error immediately.
+ * Check the inode free state is valid. This also detects lookup
+ * racing with unlinks.
*/
- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- error = -ENOENT;
+ error = xfs_iget_check_free_state(ip, flags);
+ if (error)
goto out_error;
- }
/*
* If IRECLAIMABLE is set, we've torn down the VFS inode already.
@@ -298,10 +338,14 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- error = -ENOENT;
+
+ /*
+ * Check the inode free state is valid. This also detects lookup
+ * racing with unlinks.
+ */
+ error = xfs_iget_check_free_state(ip, flags);
+ if (error)
goto out_destroy;
- }
/*
* Preload the radix tree so we can insert safely under the
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 245268a0cdf0..d70a004378d8 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -770,7 +770,7 @@ xfs_setattr_size(
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
oldsize = inode->i_size;
newsize = iattr->ia_size;
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index dc6221942b85..ab66ea0a72bf 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -162,7 +162,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 919b6544b61a..acadeaf72674 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -256,6 +256,9 @@ xfs_rtallocate_extent_block(
end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
i <= end;
i++) {
+ /* Make sure we don't scan off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+
/*
* See if there's a free extent of maxlen starting at i.
* If it's not so then next will contain the first non-free.
@@ -447,6 +450,14 @@ xfs_rtallocate_extent_near(
*/
if (bno >= mp->m_sb.sb_rextents)
bno = mp->m_sb.sb_rextents - 1;
+
+ /* Make sure we don't run off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+ if (maxlen < minlen) {
+ *rtblock = NULLRTBLOCK;
+ return 0;
+ }
+
/*
* Try the exact allocation first.
*/
@@ -1006,10 +1017,13 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
@@ -1017,9 +1031,12 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index be692e59938d..c457b010c623 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -44,9 +44,11 @@ xfs_sysfs_init(
struct xfs_kobj *parent_kobj,
const char *name)
{
+ struct kobject *parent;
+
+ parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype,
- &parent_kobj->kobject, "%s", name);
+ return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
}
static inline void
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index ce78534a047e..bb8de2dddabe 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -662,7 +662,7 @@ xfs_trans_dqresv(
}
}
if (ninos > 0) {
- total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
+ total_count = dqp->q_res_icount + ninos;
timer = be32_to_cpu(dqp->q_core.d_itimer);
warns = be16_to_cpu(dqp->q_core.d_iwarns);
warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;