diff options
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_acl.c | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 118 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_ioctl.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 6 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 65 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 285 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 8 |
7 files changed, 336 insertions, 151 deletions
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index b23a54506446..4c7b14503716 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -250,8 +250,9 @@ xfs_set_mode(struct inode *inode, mode_t mode) if (mode != inode->i_mode) { struct iattr iattr; - iattr.ia_valid = ATTR_MODE; + iattr.ia_valid = ATTR_MODE | ATTR_CTIME; iattr.ia_mode = mode; + iattr.ia_ctime = current_fs_time(inode->i_sb); error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); } diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c2e30eea74dc..7263002fac64 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -204,14 +204,17 @@ xfs_ioend_new_eof( } /* - * Update on-disk file size now that data has been written to disk. - * The current in-memory file size is i_size. If a write is beyond - * eof i_new_size will be the intended file size until i_size is - * updated. If this write does not extend all the way to the valid - * file size then restrict this update to the end of the write. + * Update on-disk file size now that data has been written to disk. The + * current in-memory file size is i_size. If a write is beyond eof i_new_size + * will be the intended file size until i_size is updated. If this write does + * not extend all the way to the valid file size then restrict this update to + * the end of the write. + * + * This function does not block as blocking on the inode lock in IO completion + * can lead to IO completion order dependency deadlocks.. If it can't get the + * inode ilock it will return EAGAIN. Callers must handle this. */ - -STATIC void +STATIC int xfs_setfilesize( xfs_ioend_t *ioend) { @@ -222,9 +225,11 @@ xfs_setfilesize( ASSERT(ioend->io_type != IOMAP_READ); if (unlikely(ioend->io_error)) - return; + return 0; + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return EAGAIN; - xfs_ilock(ip, XFS_ILOCK_EXCL); isize = xfs_ioend_new_eof(ioend); if (isize) { ip->i_d.di_size = isize; @@ -232,6 +237,28 @@ xfs_setfilesize( } xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +} + +/* + * Schedule IO completion handling on a xfsdatad if this was + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. + */ +STATIC void +xfs_finish_ioend( + xfs_ioend_t *ioend, + int wait) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + struct workqueue_struct *wq; + + wq = (ioend->io_type == IOMAP_UNWRITTEN) ? + xfsconvertd_workqueue : xfsdatad_workqueue; + queue_work(wq, &ioend->io_work); + if (wait) + flush_workqueue(wq); + } } /* @@ -243,9 +270,23 @@ xfs_end_bio_delalloc( { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + int error; - xfs_setfilesize(ioend); - xfs_destroy_ioend(ioend); + /* + * If we didn't complete processing of the ioend, requeue it to the + * tail of the workqueue for another attempt later. Otherwise destroy + * it. + */ + error = xfs_setfilesize(ioend); + if (error == EAGAIN) { + atomic_inc(&ioend->io_remaining); + xfs_finish_ioend(ioend, 0); + /* ensure we don't spin on blocked ioends */ + delay(1); + } else { + ASSERT(!error); + xfs_destroy_ioend(ioend); + } } /* @@ -257,9 +298,23 @@ xfs_end_bio_written( { xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + int error; - xfs_setfilesize(ioend); - xfs_destroy_ioend(ioend); + /* + * If we didn't complete processing of the ioend, requeue it to the + * tail of the workqueue for another attempt later. Otherwise destroy + * it. + */ + error = xfs_setfilesize(ioend); + if (error == EAGAIN) { + atomic_inc(&ioend->io_remaining); + xfs_finish_ioend(ioend, 0); + /* ensure we don't spin on blocked ioends */ + delay(1); + } else { + ASSERT(!error); + xfs_destroy_ioend(ioend); + } } /* @@ -279,13 +334,25 @@ xfs_end_bio_unwritten( size_t size = ioend->io_size; if (likely(!ioend->io_error)) { + int error; if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - int error; error = xfs_iomap_write_unwritten(ip, offset, size); if (error) ioend->io_error = error; } - xfs_setfilesize(ioend); + /* + * If we didn't complete processing of the ioend, requeue it to the + * tail of the workqueue for another attempt later. Otherwise destroy + * it. + */ + error = xfs_setfilesize(ioend); + if (error == EAGAIN) { + atomic_inc(&ioend->io_remaining); + xfs_finish_ioend(ioend, 0); + /* ensure we don't spin on blocked ioends */ + delay(1); + return; + } } xfs_destroy_ioend(ioend); } @@ -304,27 +371,6 @@ xfs_end_bio_read( } /* - * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. If we are asked to wait, - * flush the workqueue. - */ -STATIC void -xfs_finish_ioend( - xfs_ioend_t *ioend, - int wait) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) { - struct workqueue_struct *wq = xfsdatad_workqueue; - if (ioend->io_work.func == xfs_end_bio_unwritten) - wq = xfsconvertd_workqueue; - - queue_work(wq, &ioend->io_work); - if (wait) - flush_workqueue(wq); - } -} - -/* * Allocate and initialise an IO completion structure. * We need to track unwritten extent write completion here initially. * We'll need to extend this for updating the ondisk inode size later diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 5bb523d7f37e..98fe0db674bc 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -789,6 +789,8 @@ xfs_ioc_fsgetxattr( { struct fsxattr fa; + memset(&fa, 0, sizeof(struct fsxattr)); + xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index cd42ef78f6b5..1f3b4b8f7dd4 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -573,8 +573,8 @@ xfs_vn_fallocate( bf.l_len = len; xfs_ilock(ip, XFS_IOLOCK_EXCL); - error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, XFS_ATTR_NOLOCK); + error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, + 0, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; @@ -585,7 +585,7 @@ xfs_vn_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); + error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 18a4b8e11df2..d95bfa27c62a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -930,13 +930,37 @@ xfs_fs_alloc_inode( */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) + struct inode *inode) { - xfs_inode_t *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(inode); + + xfs_itrace_entry(ip); XFS_STATS_INC(vn_reclaim); - if (xfs_reclaim(ip)) - panic("%s: cannot reclaim 0x%p\n", __func__, inode); + + /* bad inode, get out here ASAP */ + if (is_bad_inode(inode)) + goto out_reclaim; + + xfs_ioend_wait(ip); + + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + + /* + * We should never get here with one of the reclaim flags already set. + */ + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); + + /* + * We always use background reclaim here because even if the + * inode is clean, it still may be under IO and hence we have + * to take the flush lock. The background reclaim path handles + * this more efficiently than we can here, so simply let background + * reclaim tear down all inodes. + */ +out_reclaim: + xfs_inode_set_reclaim_tag(ip); } /* @@ -1140,6 +1164,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); xfs_dmops_put(mp); @@ -1299,6 +1324,8 @@ xfs_fs_remount( /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + __uint64_t resblks; + mp->m_flags &= ~XFS_MOUNT_RDONLY; if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_mountfs_check_barriers(mp); @@ -1316,11 +1343,37 @@ xfs_fs_remount( } mp->m_update_flags = 0; } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else { + resblks = mp->m_sb.sb_dblocks; + do_div(resblks, 20); + resblks = min_t(__uint64_t, resblks, 1024); + } + xfs_reserve_blocks(mp, &resblks, NULL); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + __uint64_t resblks = 0; + xfs_quiesce_data(mp); + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1503,6 +1556,8 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; + xfs_inode_shrinker_register(mp); + kfree(mtpt); xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); @@ -1842,6 +1897,7 @@ init_xfs_fs(void) goto out_cleanup_procfs; vfs_initquota(); + xfs_inode_shrinker_init(); error = register_filesystem(&xfs_fs_type); if (error) @@ -1871,6 +1927,7 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); + xfs_inode_shrinker_destroy(); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 961df0a22c78..c82683ad1484 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -64,7 +64,6 @@ xfs_inode_ag_lookup( * as the tree is sparse and a gang lookup walks to find * the number of objects requested. */ - read_lock(&pag->pag_ici_lock); if (tag == XFS_ICI_NO_TAG) { nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void **)&ip, *first_index, 1); @@ -73,7 +72,7 @@ xfs_inode_ag_lookup( (void **)&ip, *first_index, 1, tag); } if (!nr_found) - goto unlock; + return NULL; /* * Update the index for the next lookup. Catch overflows @@ -83,13 +82,8 @@ xfs_inode_ag_lookup( */ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - goto unlock; - + return NULL; return ip; - -unlock: - read_unlock(&pag->pag_ici_lock); - return NULL; } STATIC int @@ -99,7 +93,9 @@ xfs_inode_ag_walk( int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, - int tag) + int tag, + int exclusive, + int *nr_to_scan) { struct xfs_perag *pag = &mp->m_perag[ag]; uint32_t first_index; @@ -113,10 +109,20 @@ restart: int error = 0; xfs_inode_t *ip; + if (exclusive) + write_lock(&pag->pag_ici_lock); + else + read_lock(&pag->pag_ici_lock); ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); - if (!ip) + if (!ip) { + if (exclusive) + write_unlock(&pag->pag_ici_lock); + else + read_unlock(&pag->pag_ici_lock); break; + } + /* execute releases pag->pag_ici_lock */ error = execute(ip, pag, flags); if (error == EAGAIN) { skipped++; @@ -124,13 +130,12 @@ restart: } if (error) last_error = error; - /* - * bail out if the filesystem is corrupted. - */ + + /* bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) break; - } while (1); + } while ((*nr_to_scan)--); if (skipped) { delay(1); @@ -147,22 +152,31 @@ xfs_inode_ag_iterator( int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), int flags, - int tag) + int tag, + int exclusive, + int *nr_to_scan) { int error = 0; int last_error = 0; xfs_agnumber_t ag; + int nr; + nr = nr_to_scan ? *nr_to_scan : INT_MAX; for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { if (!mp->m_perag[ag].pag_ici_init) continue; - error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); + error = xfs_inode_ag_walk(mp, ag, execute, flags, tag, + exclusive, &nr); if (error) { last_error = error; if (error == EFSCORRUPTED) break; } + if (nr <= 0) + break; } + if (nr_to_scan) + *nr_to_scan = nr; return XFS_ERROR(last_error); } @@ -173,30 +187,31 @@ xfs_sync_inode_valid( struct xfs_perag *pag) { struct inode *inode = VFS_I(ip); + int error = EFSCORRUPTED; /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - read_unlock(&pag->pag_ici_lock); - return EFSCORRUPTED; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + goto out_unlock; - /* - * If we can't get a reference on the inode, it must be in reclaim. - * Leave it for the reclaim code to flush. Also avoid inodes that - * haven't been fully initialised. - */ - if (!igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - return ENOENT; - } - read_unlock(&pag->pag_ici_lock); + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + error = ENOENT; + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + goto out_unlock; - if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + goto out_unlock; + + if (is_bad_inode(inode)) { IRELE(ip); - return ENOENT; + goto out_unlock; } - return 0; + /* inode is valid */ + error = 0; +out_unlock: + read_unlock(&pag->pag_ici_lock); + return error; } STATIC int @@ -281,7 +296,7 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG); + XFS_ICI_NO_TAG, 0, NULL); if (error) return XFS_ERROR(error); @@ -303,7 +318,7 @@ xfs_sync_attr( ASSERT((flags & ~SYNC_WAIT) == 0); return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG); + XFS_ICI_NO_TAG, 0, NULL); } STATIC int @@ -663,35 +678,71 @@ xfs_syncd_stop( kthread_stop(mp->m_sync_task); } -int -xfs_reclaim_inode( - xfs_inode_t *ip, - int locked, - int sync_mode) +void +__xfs_inode_set_reclaim_tag( + struct xfs_perag *pag, + struct xfs_inode *ip) { - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable++; +} + +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); - /* The hash lock here protects a thread in xfs_iget_core from - * racing with us on linking the inode back with a vnode. - * Once we have the XFS_IRECLAIM flag set it will not touch - * us. - */ write_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); - if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + __xfs_inode_set_reclaim_tag(pag, ip); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + pag->pag_ici_reclaimable--; +} + +STATIC int +xfs_reclaim_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int sync_mode) +{ + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + */ + spin_lock(&ip->i_flags_lock); + ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* ignore as it is already under reclaim */ spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); - if (locked) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - return -EAGAIN; + return 0; } __xfs_iflags_set(ip, XFS_IRECLAIM); spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); - xfs_put_perag(ip->i_mount, pag); /* * If the inode is still dirty, then flush it out. If the inode @@ -704,10 +755,8 @@ xfs_reclaim_inode( * We get the flush lock regardless, though, just to make sure * we don't free it while it is being flushed. */ - if (!locked) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - } + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); /* * In the case of a forced shutdown we rely on xfs_iflush() to @@ -724,68 +773,94 @@ xfs_reclaim_inode( return 0; } -void -__xfs_inode_set_reclaim_tag( - struct xfs_perag *pag, - struct xfs_inode *ip) +int +xfs_reclaim_inodes( + xfs_mount_t *mp, + int mode) { - radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); + return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, + XFS_ICI_RECLAIM_TAG, 1, NULL); } /* - * We set the inode flag atomically with the radix tree tag. - * Once we get tag lookups on the radix tree, this inode flag - * can go away. + * Shrinker infrastructure. + * + * This is all far more complex than it needs to be. It adds a global list of + * mounts because the shrinkers can only call a global context. We need to make + * the shrinkers pass a context to avoid the need for global state. */ -void -xfs_inode_set_reclaim_tag( - xfs_inode_t *ip) +static LIST_HEAD(xfs_mount_list); +static struct rw_semaphore xfs_mount_list_lock; + +static int +xfs_reclaim_inode_shrink( + int nr_to_scan, + gfp_t gfp_mask) { - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + struct xfs_mount *mp; + xfs_agnumber_t ag; + int reclaimable = 0; + + if (nr_to_scan) { + if (!(gfp_mask & __GFP_FS)) + return -1; + + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, + XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); + if (nr_to_scan <= 0) + break; + } + up_read(&xfs_mount_list_lock); + } - read_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - __xfs_inode_set_reclaim_tag(pag, ip); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - spin_unlock(&ip->i_flags_lock); - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + down_read(&xfs_mount_list_lock); + list_for_each_entry(mp, &xfs_mount_list, m_mplist) { + for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { + + if (!mp->m_perag[ag].pag_ici_init) + continue; + reclaimable += mp->m_perag[ag].pag_ici_reclaimable; + } + } + up_read(&xfs_mount_list_lock); + return reclaimable; } -void -__xfs_inode_clear_reclaim_tag( - xfs_mount_t *mp, - xfs_perag_t *pag, - xfs_inode_t *ip) +static struct shrinker xfs_inode_shrinker = { + .shrink = xfs_reclaim_inode_shrink, + .seeks = DEFAULT_SEEKS, +}; + +void __init +xfs_inode_shrinker_init(void) { - radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + init_rwsem(&xfs_mount_list_lock); + register_shrinker(&xfs_inode_shrinker); } -STATIC int -xfs_reclaim_inode_now( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) +void +xfs_inode_shrinker_destroy(void) { - /* ignore if already under reclaim */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); - return 0; - } - read_unlock(&pag->pag_ici_lock); + ASSERT(list_empty(&xfs_mount_list)); + unregister_shrinker(&xfs_inode_shrinker); +} - return xfs_reclaim_inode(ip, 0, flags); +void +xfs_inode_shrinker_register( + struct xfs_mount *mp) +{ + down_write(&xfs_mount_list_lock); + list_add_tail(&mp->m_mplist, &xfs_mount_list); + up_write(&xfs_mount_list_lock); } -int -xfs_reclaim_inodes( - xfs_mount_t *mp, - int mode) +void +xfs_inode_shrinker_unregister( + struct xfs_mount *mp) { - return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, - XFS_ICI_RECLAIM_TAG); + down_write(&xfs_mount_list_lock); + list_del(&mp->m_mplist); + up_write(&xfs_mount_list_lock); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 27920eb7a820..0b28c13bdf94 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -44,7 +44,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inodes(struct xfs_inode *ip); -int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); @@ -55,6 +54,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag); + int flags, int tag, int write_lock, int *nr_to_scan); + +void xfs_inode_shrinker_init(void); +void xfs_inode_shrinker_destroy(void); +void xfs_inode_shrinker_register(struct xfs_mount *mp); +void xfs_inode_shrinker_unregister(struct xfs_mount *mp); #endif |