From 2163b1e616c41c286f5ab79912671cd4bf52057c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 25 Jun 2009 16:30:26 +0100 Subject: GFS2: Shrink the shrinker This patch removes some of the special cases that the shrinker was trying to deal with. As a result we leave fewer items on the list and none at all which cannot be demoted. This makes the list scanning more efficient and solves some issues seen with large numbers of inodes. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 297421c0427a..fdb796c4f940 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1300,7 +1300,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) struct gfs2_glock *gl; int may_demote; int nr_skipped = 0; - int got_ref = 0; LIST_HEAD(skipped); if (nr == 0) @@ -1318,7 +1317,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { gfs2_glock_hold(gl); - got_ref = 1; spin_unlock(&lru_lock); spin_lock(&gl->gl_spin); may_demote = demote_ok(gl); @@ -1327,25 +1325,14 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) if (may_demote) { handle_callback(gl, LM_ST_UNLOCKED, 0); nr--; - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put(gl); - got_ref = 0; } + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); spin_lock(&lru_lock); - if (may_demote) - continue; - } - if (list_empty(&gl->gl_lru) && - (atomic_read(&gl->gl_ref) <= (2 + got_ref))) { - nr_skipped++; - list_add(&gl->gl_lru, &skipped); - } - if (got_ref) { - spin_unlock(&lru_lock); - gfs2_glock_put(gl); - spin_lock(&lru_lock); - got_ref = 0; + continue; } + nr_skipped++; + list_add(&gl->gl_lru, &skipped); } list_splice(&skipped, &lru_list); atomic_add(nr_skipped, &lru_count); -- cgit v1.2.3 From 1946f70ab5e4eb8b54a8eaaedba2293a3750ab7e Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 25 Jun 2009 15:09:51 -0500 Subject: GFS2: keep statfs info in sync on grows GFS2 wasn't syncing its statfs info on grows. This causes a problem when you grow the filesystem on multiple nodes. GFS2 would calculate the new space based on the resource groups (which are always current), and then assume that the filesystem had grown the from the existing statfs size. If you grew the filesystem on two different nodes in a short time, the second node wouldn't see the statfs size change from the first node, and would assume that it was grown by a larger amount than it was. When all these changes were synced out, the total fileystem size would be incorrect (the first grow would be counted twice). This patch syncs makes GFS2 read in the statfs changes from disk before a grow, and write them out after the grow, while the master statfs inode is locked. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/gfs2/super.c | 39 +++++++++++++++++++++++++-------------- fs/gfs2/super.h | 4 ++++ 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 03ebb439ace0..7ebae9a4ecc0 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -624,6 +624,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, { struct gfs2_inode *ip = GFS2_I(mapping->host); struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; int alloc_required; int error = 0; @@ -637,6 +638,14 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) goto out_uninit; + if (&ip->i_inode == sdp->sd_rindex) { + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, &m_ip->i_gh); + if (unlikely(error)) { + gfs2_glock_dq(&ip->i_gh); + goto out_uninit; + } + } error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); if (error) @@ -667,6 +676,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) rblocks += RES_STATFS + RES_QUOTA; + if (&ip->i_inode == sdp->sd_rindex) + rblocks += 2 * RES_STATFS; error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -712,6 +723,10 @@ out_alloc_put: gfs2_alloc_put(ip); } out_unlock: + if (&ip->i_inode == sdp->sd_rindex) { + gfs2_glock_dq(&m_ip->i_gh); + gfs2_holder_uninit(&m_ip->i_gh); + } gfs2_glock_dq(&ip->i_gh); out_uninit: gfs2_holder_uninit(&ip->i_gh); @@ -725,14 +740,21 @@ out_uninit: static void adjust_fs_space(struct inode *inode) { struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + struct buffer_head *m_bh, *l_bh; u64 fs_total, new_free; /* Total up the file system space, according to the latest rindex. */ fs_total = gfs2_ri_total(sdp); + if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0) + return; spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); if (fs_total > (m_sc->sc_total + l_sc->sc_total)) new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); else @@ -741,6 +763,13 @@ static void adjust_fs_space(struct inode *inode) fs_warn(sdp, "File system extended by %llu blocks.\n", (unsigned long long)new_free); gfs2_statfs_change(sdp, new_free, new_free, 0); + + if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0) + goto out; + update_statfs(sdp, m_bh, l_bh); + brelse(l_bh); +out: + brelse(m_bh); } /** @@ -763,6 +792,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); u64 to = pos + copied; void *kaddr; unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); @@ -794,6 +824,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, brelse(dibh); gfs2_trans_end(sdp); + if (inode == sdp->sd_rindex) { + gfs2_glock_dq(&m_ip->i_gh); + gfs2_holder_uninit(&m_ip->i_gh); + } gfs2_glock_dq(&ip->i_gh); gfs2_holder_uninit(&ip->i_gh); return copied; @@ -823,6 +857,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct buffer_head *dibh; struct gfs2_alloc *al = ip->i_alloc; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); @@ -865,6 +900,10 @@ failed: gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } + if (inode == sdp->sd_rindex) { + gfs2_glock_dq(&m_ip->i_gh); + gfs2_holder_uninit(&m_ip->i_gh); + } gfs2_glock_dq(&ip->i_gh); gfs2_holder_uninit(&ip->i_gh); return ret; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0a6801336470..552e321cee5e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -353,7 +353,7 @@ fail: return error; } -static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) +void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) { const struct gfs2_statfs_change *str = buf; @@ -441,6 +441,29 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, brelse(l_bh); } +void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, + struct buffer_head *l_bh) +{ + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + + gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); + + spin_lock(&sdp->sd_statfs_spin); + m_sc->sc_total += l_sc->sc_total; + m_sc->sc_free += l_sc->sc_free; + m_sc->sc_dinodes += l_sc->sc_dinodes; + memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); + memset(l_bh->b_data + sizeof(struct gfs2_dinode), + 0, sizeof(struct gfs2_statfs_change)); + spin_unlock(&sdp->sd_statfs_spin); + + gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); + gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); +} + int gfs2_statfs_sync(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -477,19 +500,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) if (error) goto out_bh2; - gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); - - spin_lock(&sdp->sd_statfs_spin); - m_sc->sc_total += l_sc->sc_total; - m_sc->sc_free += l_sc->sc_free; - m_sc->sc_dinodes += l_sc->sc_dinodes; - memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); - memset(l_bh->b_data + sizeof(struct gfs2_dinode), - 0, sizeof(struct gfs2_statfs_change)); - spin_unlock(&sdp->sd_statfs_spin); - - gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); - gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); + update_statfs(sdp, m_bh, l_bh); gfs2_trans_end(sdp); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index b56413e3e40d..22e0417ed996 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -40,6 +40,10 @@ extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); extern int gfs2_statfs_init(struct gfs2_sbd *sdp); extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, s64 dinodes); +extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, + const void *buf); +extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, + struct buffer_head *l_bh); extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); -- cgit v1.2.3 From a51b56fff3f04fc5aa66b21a2a6d693ee9862d66 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 30 Jun 2009 13:51:11 -0500 Subject: GFS2: Fix panic in glock memory shrinker It is possible for gfs2_shrink_glock_memory() to check a glock for demotion that's in the process of being freed by gfs2_glock_put(). In this case, gfs2_shrink_glock_memory() will acquire a new reference to this glock, and then try to free the glock itself when it drops the refernce. To solve this, gfs2_shrink_glock_memory() just needs to check if the glock is in the process of being freed, and if so skip it without ever unlocking the lru_lock. Signed-off-by: Benjamin Marzinski Acked-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index fdb796c4f940..827136ee794c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1314,6 +1314,10 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) list_del_init(&gl->gl_lru); atomic_dec(&lru_count); + /* Check if glock is about to be freed */ + if (atomic_read(&gl->gl_ref) == 0) + continue; + /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { gfs2_glock_hold(gl); -- cgit v1.2.3 From 1e19a19584b332eb92a573b66b7342fb97e67507 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 10 Jul 2009 21:13:38 +0100 Subject: GFS2: Don't try and dealloc own inode When searching for unlinked, but still allocated inodes during block allocation, avoid the block relating to the inode that is doing the allocation. This fixes a hang caused when an unlinked, but still open, inode tries to allocate some more blocks and lands up finding itself during the search for deallocatable inodes. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index daa4ae341a29..5e5074176daa 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -961,7 +961,8 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) * Returns: The inode, if one has been found */ -static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) +static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, + u64 skip) { struct inode *inode; u32 goal = 0, block; @@ -985,6 +986,8 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) goal++; if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) continue; + if (no_addr == skip) + continue; *last_unlinked = no_addr; inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, no_addr, -1, 1); @@ -1104,7 +1107,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) if (try_rgrp_fit(rgd, al)) goto out; if (rgd->rd_flags & GFS2_RDF_CHECK) - inode = try_rgrp_unlink(rgd, last_unlinked); + inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); if (!rg_locked) gfs2_glock_dq_uninit(&al->al_rgd_gh); if (inode) @@ -1138,7 +1141,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) if (try_rgrp_fit(rgd, al)) goto out; if (rgd->rd_flags & GFS2_RDF_CHECK) - inode = try_rgrp_unlink(rgd, last_unlinked); + inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); if (!rg_locked) gfs2_glock_dq_uninit(&al->al_rgd_gh); if (inode) -- cgit v1.2.3 From 8ff22a6f9bdaac87c0eeb1d56c736181f11b4221 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 10 Jul 2009 18:04:24 -0500 Subject: GFS2: Don't put unlikely reclaim candidates on the reclaim list. GFS2 was placing far too many glocks on the reclaim list that were not good candidates for freeing up from cache. These locks would sit there and repeatedly get scanned to see if they could be reclaimed, wasting a lot of time when there was memory pressure. This fix does more checks on the locks to see if they are actually likely to be removable from cache. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 72 ++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 827136ee794c..f041a89e1ab8 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -173,6 +173,26 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) atomic_inc(&gl->gl_ref); } +/** + * demote_ok - Check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: 1 if it's ok + */ + +static int demote_ok(const struct gfs2_glock *gl) +{ + const struct gfs2_glock_operations *glops = gl->gl_ops; + + if (gl->gl_state == LM_ST_UNLOCKED) + return 0; + if (!list_empty(&gl->gl_holders)) + return 0; + if (glops->go_demote_ok) + return glops->go_demote_ok(gl); + return 1; +} + /** * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list * @gl: the glock @@ -181,14 +201,34 @@ static void gfs2_glock_hold(struct gfs2_glock *gl) static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) { + int may_reclaim; + may_reclaim = (demote_ok(gl) && + (atomic_read(&gl->gl_ref) == 1 || + (gl->gl_name.ln_type == LM_TYPE_INODE && + atomic_read(&gl->gl_ref) <= 2))); spin_lock(&lru_lock); - if (list_empty(&gl->gl_lru) && gl->gl_state != LM_ST_UNLOCKED) { + if (list_empty(&gl->gl_lru) && may_reclaim) { list_add_tail(&gl->gl_lru, &lru_list); atomic_inc(&lru_count); } spin_unlock(&lru_lock); } +/** + * gfs2_glock_put_nolock() - Decrement reference count on glock + * @gl: The glock to put + * + * This function should only be used if the caller has its own reference + * to the glock, in addition to the one it is dropping. + */ + +static void gfs2_glock_put_nolock(struct gfs2_glock *gl) +{ + if (atomic_dec_and_test(&gl->gl_ref)) + GLOCK_BUG_ON(gl, 1); + gfs2_glock_schedule_for_reclaim(gl); +} + /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put @@ -214,9 +254,9 @@ int gfs2_glock_put(struct gfs2_glock *gl) rv = 1; goto out; } - /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */ - if (atomic_read(&gl->gl_ref) == 2) - gfs2_glock_schedule_for_reclaim(gl); + spin_lock(&gl->gl_spin); + gfs2_glock_schedule_for_reclaim(gl); + spin_unlock(&gl->gl_spin); write_unlock(gl_lock_addr(gl->gl_hash)); out: return rv; @@ -398,7 +438,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) if (held2) gfs2_glock_hold(gl); else - gfs2_glock_put(gl); + gfs2_glock_put_nolock(gl); } gl->gl_state = new_state; @@ -633,7 +673,7 @@ out: out_sched: gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put(gl); + gfs2_glock_put_nolock(gl); out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); goto out; @@ -1274,26 +1314,6 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) gfs2_glock_put(gl); } -/** - * demote_ok - Check to see if it's ok to unlock a glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int demote_ok(const struct gfs2_glock *gl) -{ - const struct gfs2_glock_operations *glops = gl->gl_ops; - - if (gl->gl_state == LM_ST_UNLOCKED) - return 0; - if (!list_empty(&gl->gl_holders)) - return 0; - if (glops->go_demote_ok) - return glops->go_demote_ok(gl); - return 1; -} - static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) { -- cgit v1.2.3 From 6b94617024bd6810cde1d0d491202c30d5a38d91 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Fri, 10 Jul 2009 18:13:26 -0500 Subject: GFS2: Fix incorrent statfs consistency check Since both linked and unlinked inodes are counted by rgd->rd_dinodes, It makes no sense to count them with the used data blocks (first check that I changed), it makes sense to count them with the linked inodes (second check), and it makes no sense to care if there are more unlinked inodes than linked ones. This fixes these errors. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5e5074176daa..fba795798d3a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -285,27 +285,19 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; - if (count[1] + count[2] != tmp) { + if (count[1] != tmp) { if (gfs2_consist_rgrpd(rgd)) fs_err(sdp, "used data mismatch: %u != %u\n", count[1], tmp); return; } - if (count[3] != rgd->rd_dinodes) { + if (count[2] + count[3] != rgd->rd_dinodes) { if (gfs2_consist_rgrpd(rgd)) fs_err(sdp, "used metadata mismatch: %u != %u\n", - count[3], rgd->rd_dinodes); + count[2] + count[3], rgd->rd_dinodes); return; } - - if (count[2] > count[3]) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "unlinked inodes > inodes: %u\n", - count[2]); - return; - } - } static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) -- cgit v1.2.3 From b94a170e96dc416828af9d350ae2e34b70ae7347 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 23 Jul 2009 18:52:34 -0500 Subject: GFS2: remove dcache entries for remote deleted inodes When a file is deleted from a gfs2 filesystem on one node, a dcache entry for it may still exist on other nodes in the cluster. If this happens, gfs2 will be unable to free this file on disk. Because of this, it's possible to have a gfs2 filesystem with no files on it and no free space. With this patch, when a node receives a callback notifying it that the file is being deleted on another node, it schedules a new workqueue thread to remove the file's dcache entry. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 43 ++++++++++++++++++++++++++++++++++++++----- fs/gfs2/glock.h | 3 +++ fs/gfs2/glops.c | 21 +++++++++++++++++++++ fs/gfs2/incore.h | 2 ++ fs/gfs2/super.c | 1 + 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f041a89e1ab8..8b674b1f3a55 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -63,6 +63,7 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; +struct workqueue_struct *gfs2_delete_workqueue; static LIST_HEAD(lru_list); static atomic_t lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(lru_lock); @@ -167,7 +168,7 @@ static void glock_free(struct gfs2_glock *gl) * */ -static void gfs2_glock_hold(struct gfs2_glock *gl) +void gfs2_glock_hold(struct gfs2_glock *gl) { GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); atomic_inc(&gl->gl_ref); @@ -222,7 +223,7 @@ static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) * to the glock, in addition to the one it is dropping. */ -static void gfs2_glock_put_nolock(struct gfs2_glock *gl) +void gfs2_glock_put_nolock(struct gfs2_glock *gl) { if (atomic_dec_and_test(&gl->gl_ref)) GLOCK_BUG_ON(gl, 1); @@ -679,6 +680,29 @@ out_unlock: goto out; } +static void delete_work_func(struct work_struct *work) +{ + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip = NULL; + struct inode *inode; + u64 no_addr = 0; + + spin_lock(&gl->gl_spin); + ip = (struct gfs2_inode *)gl->gl_object; + if (ip) + no_addr = ip->i_no_addr; + spin_unlock(&gl->gl_spin); + if (ip) { + inode = gfs2_ilookup(sdp->sd_vfs, no_addr); + if (inode) { + d_prune_aliases(inode); + iput(inode); + } + } + gfs2_glock_put(gl); +} + static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; @@ -757,6 +781,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_sbd = sdp; gl->gl_aspace = NULL; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); + INIT_WORK(&gl->gl_delete, delete_work_func); /* If this glock protects actual on-disk data or metadata blocks, create a VFS inode to manage the pages/buffers holding them. */ @@ -898,6 +923,8 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, gl->gl_demote_state != state) { gl->gl_demote_state = LM_ST_UNLOCKED; } + if (gl->gl_ops->go_callback) + gl->gl_ops->go_callback(gl); trace_gfs2_demote_rq(gl); } @@ -1344,14 +1371,14 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) spin_unlock(&lru_lock); spin_lock(&gl->gl_spin); may_demote = demote_ok(gl); - spin_unlock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); if (may_demote) { handle_callback(gl, LM_ST_UNLOCKED, 0); nr--; } if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put(gl); + gfs2_glock_put_nolock(gl); + spin_unlock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); spin_lock(&lru_lock); continue; } @@ -1738,6 +1765,11 @@ int __init gfs2_glock_init(void) glock_workqueue = create_workqueue("glock_workqueue"); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); + gfs2_delete_workqueue = create_workqueue("delete_workqueue"); + if (IS_ERR(gfs2_delete_workqueue)) { + destroy_workqueue(glock_workqueue); + return PTR_ERR(gfs2_delete_workqueue); + } register_shrinker(&glock_shrinker); @@ -1748,6 +1780,7 @@ void gfs2_glock_exit(void) { unregister_shrinker(&glock_shrinker); destroy_workqueue(glock_workqueue); + destroy_workqueue(gfs2_delete_workqueue); } static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index a602a28f6f08..c609894ec0d0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -143,6 +143,7 @@ struct lm_lockops { #define GLR_TRYFAILED 13 +extern struct workqueue_struct *gfs2_delete_workqueue; static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { struct gfs2_holder *gh; @@ -191,6 +192,8 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); +void gfs2_glock_hold(struct gfs2_glock *gl); +void gfs2_glock_put_nolock(struct gfs2_glock *gl); int gfs2_glock_put(struct gfs2_glock *gl); void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index d5e4ab155ca0..6985eef06c39 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -323,6 +323,7 @@ static void trans_go_sync(struct gfs2_glock *gl) if (gl->gl_state != LM_ST_UNLOCKED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + flush_workqueue(gfs2_delete_workqueue); gfs2_meta_syncfs(sdp); gfs2_log_shutdown(sdp); } @@ -372,6 +373,25 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl) return 0; } +/** + * iopen_go_callback - schedule the dcache entry for the inode to be deleted + * @gl: the glock + * + * gl_spin lock is held while calling this + */ +static void iopen_go_callback(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; + + if (gl->gl_demote_state == LM_ST_UNLOCKED && + gl->gl_state == LM_ST_SHARED && + ip && test_bit(GIF_USER, &ip->i_flags)) { + gfs2_glock_hold(gl); + if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) + gfs2_glock_put_nolock(gl); + } +} + const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, }; @@ -406,6 +426,7 @@ const struct gfs2_glock_operations gfs2_trans_glops = { const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, + .go_callback = iopen_go_callback, }; const struct gfs2_glock_operations gfs2_flock_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 225347fbff3c..61801ada36f0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -159,6 +159,7 @@ struct gfs2_glock_operations { int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); + void (*go_callback) (struct gfs2_glock *gl); const int go_type; const unsigned long go_min_hold_time; }; @@ -228,6 +229,7 @@ struct gfs2_glock { struct list_head gl_ail_list; atomic_t gl_ail_count; struct delayed_work gl_work; + struct work_struct gl_delete; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 552e321cee5e..f522bb017973 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -691,6 +691,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) struct gfs2_holder t_gh; int error; + flush_workqueue(gfs2_delete_workqueue); gfs2_quota_sync(sdp); gfs2_statfs_sync(sdp); -- cgit v1.2.3