From f36f21ecca9ee688301174e5f2e0827827a7a7ff Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 12 May 2008 14:02:33 -0700 Subject: Fix misuses of bdevname() bdevname() fills the buffer that it is given as a parameter, so calling strcpy() or snprintf() on the returned value is redundant (and probably not guaranteed to work - I don't think strcpy and snprintf support overlapping buffers.) Signed-off-by: Jean Delvare Cc: Stephen Tweedie Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/mballoc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fbec2ef93797..b128bdc0f55c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2639,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb) struct proc_dir_entry *proc; char devname[64]; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); @@ -2674,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) if (sbi->s_mb_proc == NULL) return -EINVAL; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); -- cgit v1.2.3 From 1930479c4b6bbcb6f164a5b3498e0d98329967f4 Mon Sep 17 00:00:00 2001 From: Valerie Clement Date: Tue, 13 May 2008 19:31:14 -0400 Subject: ext4: mballoc fix mb_normalize_request algorithm for 1KB block size filesystems In case of inode preallocation, the number of blocks to allocate depends on the file size and it is calculated in ext4_mb_normalize_request(). Each group in the filesystem is then checked to find one that can be used for allocation; this is done in ext4_mb_good_group(). When a file bigger than 4MB is created, the requested number of blocks to preallocate, calculated by ext4_mb_normalize_request is 4096. However for a filesystem with 1KB block size, the maximum size of the block buddies used by the multiblock allocator is 2048, so none of groups in the filesystem satisfies the search criteria in ext4_mb_good_group(). Scanning all the filesystem groups impacts performance. This was demonstrated by using a freshly created, 70GB, 1k block filesystem, with caches dropped write before the test via /proc/sys/vm/drop_caches, and with the filesystem mounted with nodelalloc and nodealloc,nomballoc. The time to write an 8 megabyte file using "dd if=/dev/zero of=/mnt/test/fo bs=8k count=1k conv=fsync" took 35.5091 seconds (236kB/s) with nodellaloc, and 0.233754 seconds (35.9 MB/s) with the nodelloc,nomballoc options. With a 1TB partition, it took several minutes to write 8MB! This patch modifies the algorithm in ext4_mb_normalize_group_request to calculate the number of blocks to allocate by taking into account the maximum size of free blocks chunks handled by the multiblock allocator. It has also been tested for filesystems with 2KB and 4KB block sizes to ensure that those cases don't regress. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Valerie Clement Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b128bdc0f55c..1d7fde994521 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2880,12 +2880,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (size < i_size_read(ac->ac_inode)) size = i_size_read(ac->ac_inode); - /* max available blocks in a free group */ - max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - - EXT4_SB(ac->ac_sb)->s_itb_per_group; + /* max size of free chunks */ + max = 2 << bsbits; -#define NRL_CHECK_SIZE(req, size, max,bits) \ - (req <= (size) || max <= ((size) >> bits)) +#define NRL_CHECK_SIZE(req, size, max, chunk_size) \ + (req <= (size) || max <= (chunk_size)) /* first, try to predict filesize */ /* XXX: should this table be tunable? */ @@ -2904,16 +2903,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = 512 * 1024; } else if (size <= 1024 * 1024) { size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { + } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (20 - bsbits)) << 20; - size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { + (21 - bsbits)) << 21; + size = 2 * 1024 * 1024; + } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (22 - bsbits)) << 22; size = 4 * 1024 * 1024; } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, - (8<<20)>>bsbits, max, bsbits)) { + (8<<20)>>bsbits, max, 8 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (23 - bsbits)) << 23; size = 8 * 1024 * 1024; -- cgit v1.2.3 From 519deca0496a4df07d15acf3181ca5d573bffdec Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 15 May 2008 14:43:20 -0400 Subject: ext4: Retry block allocation if new blocks are allocated from system zone. If the block allocator gets blocks out of system zone ext4 calls ext4_error. But if the file system is mounted with errors=continue retry block allocation. We need to mark the system zone blocks as in use to make sure retry don't pick them again System zone is the block range mapping block bitmap, inode bitmap and inode table. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1d7fde994521..873ad9b3418c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2736,7 +2736,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block; - int err; + int err, len; BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(ac->ac_b_ex.fe_len <= 0); @@ -2770,14 +2770,27 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ac->ac_b_ex.fe_start + le32_to_cpu(es->s_first_data_block); - if (block == ext4_block_bitmap(sb, gdp) || - block == ext4_inode_bitmap(sb, gdp) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - + len = ac->ac_b_ex.fe_len; + if (in_range(ext4_block_bitmap(sb, gdp), block, len) || + in_range(ext4_inode_bitmap(sb, gdp), block, len) || + in_range(block, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group) || + in_range(block + len - 1, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group)) { ext4_error(sb, __func__, "Allocating block in system zone - block = %llu", block); + /* File system mounted not to panic on error + * Fix the bitmap and repeat the block allocation + * We leak some of the blocks here. + */ + mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), + bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); + err = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (!err) + err = -EAGAIN; + goto out_err; } #ifdef AGGRESSIVE_CHECK { @@ -4032,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ac->ac_op = EXT4_MB_HISTORY_ALLOC; ext4_mb_normalize_request(ac, ar); - repeat: /* allocate space in core */ ext4_mb_regular_allocator(ac); @@ -4046,10 +4058,21 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(ac, handle); - *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - ar->len = ac->ac_b_ex.fe_len; + *errp = ext4_mb_mark_diskspace_used(ac, handle); + if (*errp == -EAGAIN) { + ac->ac_b_ex.fe_group = 0; + ac->ac_b_ex.fe_start = 0; + ac->ac_b_ex.fe_len = 0; + ac->ac_status = AC_STATUS_CONTINUE; + goto repeat; + } else if (*errp) { + ac->ac_b_ex.fe_len = 0; + ar->len = 0; + ext4_mb_show_ac(ac); + } else { + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); + ar->len = ac->ac_b_ex.fe_len; + } } else { freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) @@ -4236,6 +4259,8 @@ do_more: ext4_error(sb, __func__, "Freeing blocks in system zone - " "Block = %lu, count = %lu", block, count); + /* err = 0. ext4_std_error should be a no op */ + goto error_return; } BUFFER_TRACE(bitmap_bh, "getting write access"); -- cgit v1.2.3