diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/dir.c | 19 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 12 | ||||
-rw-r--r-- | fs/ext4/extents.c | 4 | ||||
-rw-r--r-- | fs/ext4/file.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 73 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 46 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 3 | ||||
-rw-r--r-- | fs/ext4/namei.c | 58 | ||||
-rw-r--r-- | fs/ext4/super.c | 21 |
9 files changed, 187 insertions, 53 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index dc676714454a..446b6c375b6f 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -107,7 +107,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct buffer_head *bh = NULL; - int dir_has_error = 0; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); if (ext4_encrypted_inode(inode)) { @@ -143,8 +142,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) return err; } - offset = ctx->pos & (sb->s_blocksize - 1); - while (ctx->pos < inode->i_size) { struct ext4_map_blocks map; @@ -153,9 +150,18 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) goto errout; } cond_resched(); + offset = ctx->pos & (sb->s_blocksize - 1); map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); map.m_len = 1; err = ext4_map_blocks(NULL, inode, &map, 0); + if (err == 0) { + /* m_len should never be zero but let's avoid + * an infinite loop if it somehow is */ + if (map.m_len == 0) + map.m_len = 1; + ctx->pos += map.m_len * sb->s_blocksize; + continue; + } if (err > 0) { pgoff_t index = map.m_pblk >> (PAGE_SHIFT - inode->i_blkbits); @@ -174,13 +180,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) } if (!bh) { - if (!dir_has_error) { - EXT4_ERROR_FILE(file, 0, - "directory contains a " - "hole at offset %llu", - (unsigned long long) ctx->pos); - dir_has_error = 1; - } /* corrupt size? Maybe no more blocks to read */ if (ctx->pos > inode->i_blocks << 9) break; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 1437f62d068c..116401578401 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -364,20 +364,20 @@ static inline int ext4_journal_force_commit(journal_t *journal) } static inline int ext4_jbd2_inode_add_write(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_write(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_write(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } static inline int ext4_jbd2_inode_add_wait(handle_t *handle, - struct inode *inode) + struct inode *inode, loff_t start_byte, loff_t length) { if (ext4_handle_valid(handle)) - return jbd2_journal_inode_add_wait(handle, - EXT4_I(inode)->jinode); + return jbd2_journal_inode_ranged_wait(handle, + EXT4_I(inode)->jinode, start_byte, length); return 0; } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 01f44364c547..20d68554680f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3756,8 +3756,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, * illegal. */ if (ee_block != map->m_lblk || ee_len > map->m_len) { -#ifdef EXT4_DEBUG - ext4_warning("Inode (%ld) finished: extent logical block %llu," +#ifdef CONFIG_EXT4_DEBUG + ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu," " len %u; IO logical block %llu, len %u", inode->i_ino, (unsigned long long)ee_block, ee_len, (unsigned long long)map->m_lblk, map->m_len); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 44966b272216..4ede0af9d6fe 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -163,6 +163,10 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret <= 0) return ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 862766a1b080..11bc4c69bf16 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -195,7 +195,12 @@ void ext4_evict_inode(struct inode *inode) { handle_t *handle; int err; - int extra_credits = 3; + /* + * Credits for final inode cleanup and freeing: + * sb + inode (ext4_orphan_del()), block bitmap, group descriptor + * (xattr block freeing), bitmap, group descriptor (inode freeing) + */ + int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; trace_ext4_evict_inode(inode); @@ -251,8 +256,12 @@ void ext4_evict_inode(struct inode *inode) if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); + /* + * Block bitmap, group descriptor, and inode are accounted in both + * ext4_blocks_for_truncate() and extra_credits. So subtract 3. + */ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, - ext4_blocks_for_truncate(inode)+extra_credits); + ext4_blocks_for_truncate(inode) + extra_credits - 3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* @@ -728,10 +737,16 @@ out_sem: !(flags & EXT4_GET_BLOCKS_ZERO) && !ext4_is_quota_file(inode) && ext4_should_order_data(inode)) { + loff_t start_byte = + (loff_t)map->m_lblk << inode->i_blkbits; + loff_t length = (loff_t)map->m_len << inode->i_blkbits; + if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) - ret = ext4_jbd2_inode_add_wait(handle, inode); + ret = ext4_jbd2_inode_add_wait(handle, inode, + start_byte, length); else - ret = ext4_jbd2_inode_add_write(handle, inode); + ret = ext4_jbd2_inode_add_write(handle, inode, + start_byte, length); if (ret) return ret; } @@ -4004,7 +4019,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, err = 0; mark_buffer_dirty(bh); if (ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode); + err = ext4_jbd2_inode_add_write(handle, inode, from, + length); } unlock: @@ -4170,6 +4186,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) trace_ext4_punch_hole(inode, offset, length, 0); + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + if (ext4_has_inline_data(inode)) { + down_write(&EXT4_I(inode)->i_mmap_sem); + ret = ext4_convert_inline_data(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); + if (ret) + return ret; + } + /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -5280,11 +5305,15 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * All buffers in the last page remain valid? Then there's nothing to - * do. We do the check mainly to optimize the common PAGE_SIZE == - * blocksize case + * If the page is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidatepage() may + * strip all buffers from the page but keep the page dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * buffers). Also we don't need to wait for any commit if all buffers in + * the page remain valid. This is most beneficial for the common case of + * blocksize == PAGESIZE. */ - if (offset > PAGE_SIZE - i_blocksize(inode)) + if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { page = find_lock_page(inode->i_mapping, @@ -5341,6 +5370,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (unlikely(IS_APPEND(inode) && + (ia_valid & (ATTR_MODE | ATTR_UID | + ATTR_GID | ATTR_TIMES_SET)))) + return -EPERM; + error = setattr_prepare(dentry, attr); if (error) return error; @@ -5727,8 +5764,23 @@ static int __ext4_expand_extra_isize(struct inode *inode, { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); int error; + /* this was checked at iget time, but double check for good measure */ + if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || + (ei->i_extra_isize & 3)) { + EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", + ei->i_extra_isize, + EXT4_INODE_SIZE(inode->i_sb)); + return -EFSCORRUPTED; + } + if ((new_extra_isize < ei->i_extra_isize) || + (new_extra_isize < 4) || + (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) + return -EINVAL; /* Should never happen */ + raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); @@ -6045,6 +6097,9 @@ int ext4_page_mkwrite(struct vm_fault *vmf) get_block_t *get_block; int retries = 0; + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d2efc0cb8f31..82e118e9e50b 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -210,6 +210,29 @@ static int uuid_is_zero(__u8 u[16]) } #endif +/* + * If immutable is set and we are not clearing it, we're not allowed to change + * anything else in the inode. Don't error out if we're only trying to set + * immutable on an immutable file. + */ +static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, + unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int oldflags = ei->i_flags; + + if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL)) + return 0; + + if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL)) + return -EPERM; + if (ext4_has_feature_project(inode->i_sb) && + __kprojid_val(ei->i_projid) != new_projid) + return -EPERM; + + return 0; +} + static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { @@ -263,6 +286,20 @@ static int ext4_ioctl_setflags(struct inode *inode, goto flags_out; } + /* + * Wait for all pending directio and then flush all the dirty pages + * for this file. The flush marks all the pages readonly, so any + * subsequent attempt to write to the file (particularly mmap pages) + * will come through the filesystem and fail. + */ + if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && + (flags & EXT4_IMMUTABLE_FL)) { + inode_dio_wait(inode); + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto flags_out; + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -653,7 +690,11 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return err; inode_lock(inode); - err = ext4_ioctl_setflags(inode, flags); + err = ext4_ioctl_check_immutable(inode, + from_kprojid(&init_user_ns, ei->i_projid), + flags); + if (!err) + err = ext4_ioctl_setflags(inode, flags); inode_unlock(inode); mnt_drop_write_file(filp); return err; @@ -1061,6 +1102,9 @@ resizefs_out: goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | (flags & EXT4_FL_XFLAG_VISIBLE); + err = ext4_ioctl_check_immutable(inode, fa.fsx_projid, flags); + if (err) + goto out; err = ext4_ioctl_setflags(inode, flags); if (err) goto out; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index cd8d481e0c48..ef60f2e92da6 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -400,7 +400,8 @@ data_copy: /* Even in case of data=writeback it is reasonable to pin * inode to transaction, to prevent unexpected data loss */ - *err = ext4_jbd2_inode_add_write(handle, orig_inode); + *err = ext4_jbd2_inode_add_write(handle, orig_inode, + (loff_t)orig_page_offset << PAGE_SHIFT, replaced_size); unlock_pages: unlock_page(pagep[0]); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 070660cb5b91..b4e0c270def4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -80,8 +80,18 @@ static struct buffer_head *ext4_append(handle_t *handle, static int ext4_dx_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent); +/* + * Hints to ext4_read_dirblock regarding whether we expect a directory + * block being read to be an index block, or a block containing + * directory entries (and if the latter, whether it was found via a + * logical block in an htree index block). This is used to control + * what sort of sanity checkinig ext4_read_dirblock() will do on the + * directory block read from the storage device. EITHER will means + * the caller doesn't know what kind of directory block will be read, + * so no specific verification will be done. + */ typedef enum { - EITHER, INDEX, DIRENT + EITHER, INDEX, DIRENT, DIRENT_HTREE } dirblock_type_t; #define ext4_read_dirblock(inode, block, type) \ @@ -107,11 +117,14 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, return bh; } - if (!bh) { + if (!bh && (type == INDEX || type == DIRENT_HTREE)) { ext4_error_inode(inode, func, line, block, - "Directory hole found"); + "Directory hole found for htree %s block", + (type == INDEX) ? "index" : "leaf"); return ERR_PTR(-EFSCORRUPTED); } + if (!bh) + return NULL; dirent = (struct ext4_dir_entry *) bh->b_data; /* Determine whether or not we have an index block */ if (is_dx(inode)) { @@ -978,7 +991,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1508,7 +1521,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, return (struct buffer_head *) frame; do { block = dx_get_block(frame->at); - bh = ext4_read_dirblock(dir, block, DIRENT); + bh = ext4_read_dirblock(dir, block, DIRENT_HTREE); if (IS_ERR(bh)) goto errout; @@ -2088,6 +2101,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { bh = ext4_read_dirblock(dir, block, DIRENT); + if (bh == NULL) { + bh = ext4_bread(handle, dir, block, + EXT4_GET_BLOCKS_CREATE); + goto add_to_new_block; + } if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2108,6 +2126,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, brelse(bh); } bh = ext4_append(handle, dir, &block); +add_to_new_block: if (IS_ERR(bh)) { retval = PTR_ERR(bh); bh = NULL; @@ -2152,7 +2171,7 @@ again: return PTR_ERR(frame); entries = frame->entries; at = frame->at; - bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE); if (IS_ERR(bh)) { err = PTR_ERR(bh); bh = NULL; @@ -2274,7 +2293,7 @@ again: dxroot->info.indirect_levels += 1; dxtrace(printk(KERN_DEBUG "Creating %d level index...\n", - info->indirect_levels)); + dxroot->info.indirect_levels)); err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (err) goto journal_error; @@ -2700,7 +2719,10 @@ bool ext4_empty_dir(struct inode *inode) EXT4_ERROR_INODE(inode, "invalid size"); return true; } - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, + * so treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) return true; @@ -2722,6 +2744,10 @@ bool ext4_empty_dir(struct inode *inode) brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); bh = ext4_read_dirblock(inode, lblock, EITHER); + if (bh == NULL) { + offset += sb->s_blocksize; + continue; + } if (IS_ERR(bh)) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; @@ -3039,18 +3065,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - if (inode->i_nlink == 0) { - ext4_warning_inode(inode, "Deleting file '%.*s' with no links", - dentry->d_name.len, dentry->d_name.name); - set_nlink(inode, 1); - } retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + if (inode->i_nlink == 0) + ext4_warning_inode(inode, "Deleting file '%.*s' with no links", + dentry->d_name.len, dentry->d_name.name); + else + drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = current_time(inode); @@ -3292,7 +3317,10 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { - bh = ext4_read_dirblock(inode, 0, EITHER); + /* The first directory block must not be a hole, so + * treat it as DIRENT_HTREE + */ + bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE); if (IS_ERR(bh)) { *retval = PTR_ERR(bh); return NULL; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 61d07608577e..1a0a56647974 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3458,12 +3458,15 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + unsigned def_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; - /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && - sbi->s_want_extra_isize == 0) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; + if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = 0; + return; + } + if (sbi->s_want_extra_isize < 4) { + sbi->s_want_extra_isize = def_extra_isize; if (ext4_has_feature_extra_isize(sb)) { if (sbi->s_want_extra_isize < le16_to_cpu(es->s_want_extra_isize)) @@ -3476,10 +3479,10 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb) } } /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; + if ((sbi->s_want_extra_isize > sbi->s_inode_size) || + (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size)) { + sbi->s_want_extra_isize = def_extra_isize; ext4_msg(sb, KERN_INFO, "required extra inode space not available"); } |