summaryrefslogtreecommitdiff
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c144
1 files changed, 110 insertions, 34 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5d6d53578124..94cc84db7c9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -81,6 +81,7 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
+static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
@@ -353,10 +354,13 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int error = is_journal_aborted(journal);
- struct ext4_journal_cb_entry *jce, *tmp;
+ struct ext4_journal_cb_entry *jce;
+ BUG_ON(txn->t_state == T_FINISHED);
spin_lock(&sbi->s_md_lock);
- list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
+ while (!list_empty(&txn->t_private_list)) {
+ jce = list_entry(txn->t_private_list.next,
+ struct ext4_journal_cb_entry, jce_list);
list_del_init(&jce->jce_list);
spin_unlock(&sbi->s_md_lock);
jce->jce_func(sb, jce, error);
@@ -699,22 +703,19 @@ fail:
/*
* Release the journal device
*/
-static int ext4_blkdev_put(struct block_device *bdev)
+static void ext4_blkdev_put(struct block_device *bdev)
{
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
-static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
+static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
{
struct block_device *bdev;
- int ret = -ENODEV;
-
bdev = sbi->journal_bdev;
if (bdev) {
- ret = ext4_blkdev_put(bdev);
+ ext4_blkdev_put(bdev);
sbi->journal_bdev = NULL;
}
- return ret;
}
static inline struct inode *orphan_list_entry(struct list_head *l)
@@ -1802,7 +1803,7 @@ static int options_seq_show(struct seq_file *seq, void *offset)
static int options_open_fs(struct inode *inode, struct file *file)
{
- return single_open(file, options_seq_show, PDE(inode)->data);
+ return single_open(file, options_seq_show, PDE_DATA(inode));
}
static const struct file_operations ext4_seq_options_fops = {
@@ -1948,16 +1949,16 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
if ((sbi->s_es->s_feature_ro_compat &
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
/* Use new metadata_csum algorithm */
- __u16 old_csum;
+ __le16 save_csum;
__u32 csum32;
- old_csum = gdp->bg_checksum;
+ save_csum = gdp->bg_checksum;
gdp->bg_checksum = 0;
csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
sizeof(le_group));
csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
sbi->s_desc_size);
- gdp->bg_checksum = old_csum;
+ gdp->bg_checksum = save_csum;
crc = csum32 & 0xFFFF;
goto out;
@@ -2379,17 +2380,15 @@ struct ext4_attr {
int offset;
};
-static int parse_strtoul(const char *buf,
- unsigned long max, unsigned long *value)
+static int parse_strtoull(const char *buf,
+ unsigned long long max, unsigned long long *value)
{
- char *endp;
-
- *value = simple_strtoul(skip_spaces(buf), &endp, 0);
- endp = skip_spaces(endp);
- if (*endp || *value > max)
- return -EINVAL;
+ int ret;
- return 0;
+ ret = kstrtoull(skip_spaces(buf), 0, value);
+ if (!ret && *value > max)
+ ret = -EINVAL;
+ return ret;
}
static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
@@ -2431,11 +2430,13 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
const char *buf, size_t count)
{
unsigned long t;
+ int ret;
- if (parse_strtoul(buf, 0x40000000, &t))
- return -EINVAL;
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
- if (t && !is_power_of_2(t))
+ if (t && (!is_power_of_2(t) || t > 0x40000000))
return -EINVAL;
sbi->s_inode_readahead_blks = t;
@@ -2456,13 +2457,36 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
{
unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
unsigned long t;
+ int ret;
- if (parse_strtoul(buf, 0xffffffff, &t))
- return -EINVAL;
+ ret = kstrtoul(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
*ui = t;
return count;
}
+static ssize_t reserved_clusters_show(struct ext4_attr *a,
+ struct ext4_sb_info *sbi, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
+}
+
+static ssize_t reserved_clusters_store(struct ext4_attr *a,
+ struct ext4_sb_info *sbi,
+ const char *buf, size_t count)
+{
+ unsigned long long val;
+ int ret;
+
+ if (parse_strtoull(buf, -1ULL, &val))
+ return -EINVAL;
+ ret = ext4_reserve_clusters(sbi, val);
+
+ return ret ? ret : count;
+}
+
static ssize_t trigger_test_error(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t count)
@@ -2500,6 +2524,7 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
EXT4_RO_ATTR(delayed_allocation_blocks);
EXT4_RO_ATTR(session_write_kbytes);
EXT4_RO_ATTR(lifetime_write_kbytes);
+EXT4_RW_ATTR(reserved_clusters);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
inode_readahead_blks_store, s_inode_readahead_blks);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
@@ -2517,6 +2542,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(delayed_allocation_blocks),
ATTR_LIST(session_write_kbytes),
ATTR_LIST(lifetime_write_kbytes),
+ ATTR_LIST(reserved_clusters),
ATTR_LIST(inode_readahead_blks),
ATTR_LIST(inode_goal),
ATTR_LIST(mb_stats),
@@ -3192,6 +3218,40 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
+
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+{
+ ext4_fsblk_t resv_clusters;
+
+ /*
+ * By default we reserve 2% or 4096 clusters, whichever is smaller.
+ * This should cover the situations where we can not afford to run
+ * out of space like for example punch hole, or converting
+ * uninitialized extents in delalloc path. In most cases such
+ * allocation would require 1, or 2 blocks, higher numbers are
+ * very rare.
+ */
+ resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+
+ do_div(resv_clusters, 50);
+ resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
+
+ return resv_clusters;
+}
+
+
+static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
+{
+ ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
+ sbi->s_cluster_bits;
+
+ if (count >= clusters)
+ return -EINVAL;
+
+ atomic64_set(&sbi->s_resv_clusters, count);
+ return 0;
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -3526,6 +3586,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+ /* Do we have standard group size of blocksize * 8 blocks ? */
+ if (sbi->s_blocks_per_group == blocksize << 3)
+ set_opt2(sb, STD_GROUP_SIZE);
+
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
@@ -3698,6 +3762,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_err_report.function = print_daily_error_info;
sbi->s_err_report.data = (unsigned long) sb;
+ /* Register extent status tree shrinker */
+ ext4_es_register_shrinker(sb);
+
err = percpu_counter_init(&sbi->s_freeclusters_counter,
ext4_count_free_clusters(sb));
if (!err) {
@@ -3723,9 +3790,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_max_writeback_mb_bump = 128;
sbi->s_extent_max_zeroout_kb = 32;
- /* Register extent status tree shrinker */
- ext4_es_register_shrinker(sb);
-
/*
* set up enough so that it can read an inode
*/
@@ -3911,6 +3975,13 @@ no_journal:
"available");
}
+ err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
+ "reserved pool", ext4_calculate_resv_clusters(sbi));
+ goto failed_mount4a;
+ }
+
err = ext4_setup_system_zone(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize system "
@@ -4010,6 +4081,7 @@ failed_mount_wq:
sbi->s_journal = NULL;
}
failed_mount3:
+ ext4_es_unregister_shrinker(sb);
del_timer(&sbi->s_err_report);
if (sbi->s_flex_groups)
ext4_kvfree(sbi->s_flex_groups);
@@ -4177,7 +4249,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
goto out_bdev;
}
journal->j_private = sb;
- ll_rw_block(READ, 1, &journal->j_sb_buffer);
+ ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
wait_on_buffer(journal->j_sb_buffer);
if (!buffer_uptodate(journal->j_sb_buffer)) {
ext4_msg(sb, KERN_ERR, "I/O error on journal device");
@@ -4742,9 +4814,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- ext4_fsblk_t overhead = 0;
+ ext4_fsblk_t overhead = 0, resv_blocks;
u64 fsid;
s64 bfree;
+ resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
if (!test_opt(sb, MINIX_DF))
overhead = sbi->s_overhead;
@@ -4756,8 +4829,9 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
/* prevent underflow in case that few free space is available */
buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
- buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
- if (buf->f_bfree < ext4_r_blocks_count(es))
+ buf->f_bavail = buf->f_bfree -
+ (ext4_r_blocks_count(es) + resv_blocks);
+ if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
buf->f_bavail = 0;
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
@@ -4945,6 +5019,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
return PTR_ERR(qf_inode);
}
+ /* Don't account quota for quota files to avoid recursion */
+ qf_inode->i_flags |= S_NOQUOTA;
err = dquot_enable(qf_inode, type, format_id, flags);
iput(qf_inode);