From 33b8f7c2479dfcbc5c27174e44b5f659d9f33c70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 Jul 2011 14:34:39 +0200 Subject: xfs: improve sync behaviour in the face of aggressive dirtying The following script from Wu Fengguang shows very bad behaviour in XFS when aggressively dirtying data during a sync on XFS, with sync times up to almost 10 times as long as ext4. A large part of the issue is that XFS writes data out itself two times in the ->sync_fs method, overriding the livelock protection in the core writeback code, and another issue is the lock-less xfs_ioend_wait call, which doesn't prevent new ioend from being queue up while waiting for the count to reach zero. This patch removes the XFS-internal sync calls and relies on the VFS to do it's work just like all other filesystems do. Note that the i_iocount wait which is rather suboptimal is simply removed here. We already do it in ->write_inode, which keeps the current supoptimal behaviour. We'll eventually need to remove that as well, but that's material for a separate commit. ------------------------------ snip ------------------------------ #!/bin/sh umount /dev/sda7 mkfs.xfs -f /dev/sda7 # mkfs.ext4 /dev/sda7 # mkfs.btrfs /dev/sda7 mount /dev/sda7 /fs echo $((50<<20)) > /proc/sys/vm/dirty_bytes pid= for i in `seq 10` do dd if=/dev/zero of=/fs/zero-$i bs=1M count=1000 & pid="$pid $!" done sleep 1 tic=$(date +'%s') sync tac=$(date +'%s') echo echo sync time: $((tac-tic)) egrep '(Dirty|Writeback|NFS_Unstable)' /proc/meminfo pidof dd > /dev/null && { kill -9 $pid; echo sync NOT livelocked; } ------------------------------ snip ------------------------------ Signed-off-by: Christoph Hellwig Reported-by: Wu Fengguang Reviewed-by: Alex Elder Reviewed-by: Dave Chinner --- fs/xfs/linux-2.6/xfs_sync.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 8ecad5ff9f9b..f54e8ee1ed62 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -359,14 +359,12 @@ xfs_quiesce_data( { int error, error2 = 0; - /* push non-blocking */ - xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - - /* push and block till complete */ - xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); + /* force out the newly dirtied log buffers */ + xfs_log_force(mp, XFS_LOG_SYNC); + /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp); -- cgit v1.2.3 From adab0f67d1cdaf468bbc311bce4d61f17626a536 Mon Sep 17 00:00:00 2001 From: Chandra Seetharaman Date: Wed, 29 Jun 2011 22:10:14 +0000 Subject: xfs: Remove the second parameter to xfs_sb_count() Remove the second parameter to xfs_sb_count() since all callers of the function set them. Also, fix the header comment regarding it being called periodically. Signed-off-by: Chandra Seetharaman Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index f54e8ee1ed62..5cc158e52d4c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -434,7 +434,7 @@ xfs_quiesce_attr( WARN_ON(atomic_read(&mp->m_active_trans) != 0); /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp, 1); + error = xfs_log_sbcount(mp); if (error) xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); -- cgit v1.2.3 From 8daaa83145ef1f0a146680618328dbbd0fa76939 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jul 2011 14:14:46 +1000 Subject: xfs: make use of new shrinker callout for the inode cache Convert the inode reclaim shrinker to use the new per-sb shrinker operations. This allows much bigger reclaim batches to be used, and allows the XFS inode cache to be shrunk in proportion with the VFS dentry and inode caches. This avoids the problem of the VFS caches being shrunk significantly before the XFS inode cache is shrunk resulting in imbalances in the caches during reclaim. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/xfs/linux-2.6/xfs_sync.c | 71 +++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 44 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.c') diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 8ecad5ff9f9b..9bd7e895a4e2 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -179,6 +179,8 @@ restart: if (error == EFSCORRUPTED) break; + cond_resched(); + } while (nr_found && !done); if (skipped) { @@ -986,6 +988,8 @@ restart: *nr_to_scan -= XFS_LOOKUP_BATCH; + cond_resched(); + } while (nr_found && !done && *nr_to_scan > 0); if (trylock && !done) @@ -1003,7 +1007,7 @@ restart: * ensure that when we get more reclaimers than AGs we block rather * than spin trying to execute reclaim. */ - if (trylock && skipped && *nr_to_scan > 0) { + if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { trylock = 0; goto restart; } @@ -1021,44 +1025,38 @@ xfs_reclaim_inodes( } /* - * Inode cache shrinker. + * Scan a certain number of inodes for reclaim. * * When called we make sure that there is a background (fast) inode reclaim in - * progress, while we will throttle the speed of reclaim via doiing synchronous + * progress, while we will throttle the speed of reclaim via doing synchronous * reclaim of inodes. That means if we come across dirty inodes, we wait for * them to be cleaned, which we hope will not be very long due to the * background walker having already kicked the IO off on those dirty inodes. */ -static int -xfs_reclaim_inode_shrink( - struct shrinker *shrink, - struct shrink_control *sc) +void +xfs_reclaim_inodes_nr( + struct xfs_mount *mp, + int nr_to_scan) { - struct xfs_mount *mp; - struct xfs_perag *pag; - xfs_agnumber_t ag; - int reclaimable; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; - - mp = container_of(shrink, struct xfs_mount, m_inode_shrink); - if (nr_to_scan) { - /* kick background reclaimer and push the AIL */ - xfs_syncd_queue_reclaim(mp); - xfs_ail_push_all(mp->m_ail); + /* kick background reclaimer and push the AIL */ + xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); - if (!(gfp_mask & __GFP_FS)) - return -1; + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); +} - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, - &nr_to_scan); - /* terminate if we don't exhaust the scan */ - if (nr_to_scan > 0) - return -1; - } +/* + * Return the number of reclaimable inodes in the filesystem for + * the shrinker to determine how much to reclaim. + */ +int +xfs_reclaim_inodes_count( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t ag = 0; + int reclaimable = 0; - reclaimable = 0; - ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { ag = pag->pag_agno + 1; reclaimable += pag->pag_ici_reclaimable; @@ -1067,18 +1065,3 @@ xfs_reclaim_inode_shrink( return reclaimable; } -void -xfs_inode_shrinker_register( - struct xfs_mount *mp) -{ - mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink; - mp->m_inode_shrink.seeks = DEFAULT_SEEKS; - register_shrinker(&mp->m_inode_shrink); -} - -void -xfs_inode_shrinker_unregister( - struct xfs_mount *mp) -{ - unregister_shrinker(&mp->m_inode_shrink); -} -- cgit v1.2.3