From 75c1be487a690db43da2c1234fcacd84c982803c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 28 Jul 2010 10:18:38 -0400 Subject: fsnotify: srcu to protect read side of inode and vfsmount locks Currently reading the inode->i_fsnotify_marks or vfsmount->mnt_fsnotify_marks lists are protected by a spinlock on both the read and the write side. This patch protects the read side of those lists with a new single srcu. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 69 ++++++++++++++++++++++++++++++++++------------------ fs/notify/fsnotify.h | 5 ++-- fs/notify/group.c | 16 ++++-------- fs/notify/mark.c | 60 ++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 110 insertions(+), 40 deletions(-) (limited to 'fs/notify') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4788c866473a..4678b416241e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -144,14 +144,15 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is) { struct fsnotify_mark *mark; struct hlist_node *node; + int idx; + + idx = srcu_read_lock(&fsnotify_mark_srcu); if (!hlist_empty(&inode->i_fsnotify_marks)) { - spin_lock(&inode->i_lock); - hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) { + hlist_for_each_entry_rcu(mark, node, &inode->i_fsnotify_marks, i.i_list) { if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mark->ignored_mask = 0; } - spin_unlock(&inode->i_lock); } if (data_is == FSNOTIFY_EVENT_FILE) { @@ -159,14 +160,14 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is) mnt = ((struct file *)data)->f_path.mnt; if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) { - spin_lock(&mnt->mnt_root->d_lock); - hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) { + hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) { if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) mark->ignored_mask = 0; } - spin_unlock(&mnt->mnt_root->d_lock); } } + + srcu_read_unlock(&fsnotify_mark_srcu, idx); } static int send_to_group(struct fsnotify_group *group, struct inode *to_tell, @@ -208,8 +209,10 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt) int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const unsigned char *file_name, u32 cookie) { + struct fsnotify_mark *mark; struct fsnotify_group *group; struct fsnotify_event *event = NULL; + struct hlist_node *node; struct vfsmount *mnt = NULL; int idx, ret = 0; /* global tests shouldn't care about events on child only the specific event */ @@ -237,35 +240,47 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, !needed_by_vfsmount(test_mask, mnt)) return 0; - /* - * SRCU!! the groups list is very very much read only and the path is - * very hot. The VAST majority of events are not going to need to do - * anything other than walk the list so it's crazy to pre-allocate. - */ - idx = srcu_read_lock(&fsnotify_grp_srcu); + idx = srcu_read_lock(&fsnotify_mark_srcu); if (test_mask & to_tell->i_fsnotify_mask) { - list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) { - if (test_mask & group->mask) { - ret = send_to_group(group, to_tell, NULL, mask, data, data_is, - cookie, file_name, &event); + hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) { + + pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n", + __func__, mark, mark->mask, mark->ignored_mask); + + if (test_mask & mark->mask & ~mark->ignored_mask) { + group = mark->group; + if (!group) + continue; + ret = send_to_group(group, to_tell, NULL, mask, + data, data_is, cookie, file_name, + &event); if (ret) goto out; } } } - if (needed_by_vfsmount(test_mask, mnt)) { - list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) { - if (test_mask & group->mask) { - ret = send_to_group(group, to_tell, mnt, mask, data, data_is, - cookie, file_name, &event); + + if (mnt && (test_mask & mnt->mnt_fsnotify_mask)) { + hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) { + + pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n", + __func__, mark, mark->mask, mark->ignored_mask); + + if (test_mask & mark->mask & ~mark->ignored_mask) { + group = mark->group; + if (!group) + continue; + ret = send_to_group(group, to_tell, mnt, mask, + data, data_is, cookie, file_name, + &event); if (ret) goto out; } } } out: - srcu_read_unlock(&fsnotify_grp_srcu, idx); + srcu_read_unlock(&fsnotify_mark_srcu, idx); /* * fsnotify_create_event() took a reference so the event can't be cleaned * up while we are still trying to add it to lists, drop that one. @@ -279,8 +294,14 @@ EXPORT_SYMBOL_GPL(fsnotify); static __init int fsnotify_init(void) { + int ret; + BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); - return init_srcu_struct(&fsnotify_grp_srcu); + ret = init_srcu_struct(&fsnotify_mark_srcu); + if (ret) + panic("initializing fsnotify_mark_srcu"); + + return 0; } -subsys_initcall(fsnotify_init); +core_initcall(fsnotify_init); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 1be54f6f9e7d..7eed86f942ba 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -6,8 +6,6 @@ #include #include -/* protects reads of fsnotify_groups */ -extern struct srcu_struct fsnotify_grp_srcu; /* all groups which receive inode fsnotify events */ extern struct list_head fsnotify_inode_groups; /* all groups which receive vfsmount fsnotify events */ @@ -20,6 +18,9 @@ extern __u32 fsnotify_vfsmount_mask; /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); +/* protects reads of inode and vfsmount marks list */ +extern struct srcu_struct fsnotify_mark_srcu; + extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, __u32 mask); /* add a mark to an inode */ diff --git a/fs/notify/group.c b/fs/notify/group.c index 7ac65ed4735b..48d3a6d6e47a 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -30,8 +30,6 @@ /* protects writes to fsnotify_groups and fsnotify_mask */ static DEFINE_MUTEX(fsnotify_grp_mutex); -/* protects reads while running the fsnotify_groups list */ -struct srcu_struct fsnotify_grp_srcu; /* all groups registered to receive inode filesystem notifications */ LIST_HEAD(fsnotify_inode_groups); /* all groups registered to receive mount point filesystem notifications */ @@ -50,18 +48,17 @@ void fsnotify_recalc_global_mask(void) struct fsnotify_group *group; __u32 inode_mask = 0; __u32 vfsmount_mask = 0; - int idx; - idx = srcu_read_lock(&fsnotify_grp_srcu); + mutex_lock(&fsnotify_grp_mutex); list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) inode_mask |= group->mask; list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) vfsmount_mask |= group->mask; - - srcu_read_unlock(&fsnotify_grp_srcu, idx); fsnotify_inode_mask = inode_mask; fsnotify_vfsmount_mask = vfsmount_mask; + + mutex_unlock(&fsnotify_grp_mutex); } /* @@ -168,6 +165,8 @@ static void fsnotify_destroy_group(struct fsnotify_group *group) /* clear all inode marks for this group */ fsnotify_clear_marks_by_group(group); + synchronize_srcu(&fsnotify_mark_srcu); + /* past the point of no return, matches the initial value of 1 */ if (atomic_dec_and_test(&group->num_marks)) fsnotify_final_destroy_group(group); @@ -216,12 +215,7 @@ void fsnotify_put_group(struct fsnotify_group *group) */ __fsnotify_evict_group(group); - /* - * now it's off the list, so the only thing we might care about is - * srcu access.... - */ mutex_unlock(&fsnotify_grp_mutex); - synchronize_srcu(&fsnotify_grp_srcu); /* and now it is really dead. _Nothing_ could be seeing it */ fsnotify_recalc_global_mask(); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 69c5a166930c..41f3990f900b 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -85,10 +85,12 @@ #include #include #include +#include #include #include #include #include +#include #include /* for inode_lock */ #include @@ -96,6 +98,11 @@ #include #include "fsnotify.h" +struct srcu_struct fsnotify_mark_srcu; +static DEFINE_SPINLOCK(destroy_lock); +static LIST_HEAD(destroy_list); +static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); + void fsnotify_get_mark(struct fsnotify_mark *mark) { atomic_inc(&mark->refcnt); @@ -144,11 +151,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) list_del_init(&mark->g_list); - fsnotify_put_mark(mark); /* for i_list and g_list */ - spin_unlock(&group->mark_lock); spin_unlock(&mark->lock); + spin_lock(&destroy_lock); + list_add(&mark->destroy_list, &destroy_list); + spin_unlock(&destroy_lock); + wake_up(&destroy_waitq); + /* * Some groups like to know that marks are being freed. This is a * callback to the group function to let it know that this mark @@ -263,12 +273,17 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, err: mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; list_del_init(&mark->g_list); + mark->group = NULL; atomic_dec(&group->num_marks); - fsnotify_put_mark(mark); spin_unlock(&group->mark_lock); spin_unlock(&mark->lock); + spin_lock(&destroy_lock); + list_add(&mark->destroy_list, &destroy_list); + spin_unlock(&destroy_lock); + wake_up(&destroy_waitq); + return ret; } @@ -326,3 +341,42 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, atomic_set(&mark->refcnt, 1); mark->free_mark = free_mark; } + +static int fsnotify_mark_destroy(void *ignored) +{ + struct fsnotify_mark *mark, *next; + LIST_HEAD(private_destroy_list); + + for (;;) { + spin_lock(&destroy_lock); + list_for_each_entry_safe(mark, next, &destroy_list, destroy_list) { + list_del(&mark->destroy_list); + list_add(&mark->destroy_list, &private_destroy_list); + } + spin_unlock(&destroy_lock); + + synchronize_srcu(&fsnotify_mark_srcu); + + list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) { + list_del_init(&mark->destroy_list); + fsnotify_put_mark(mark); + } + + wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); + } + + return 0; +} + +static int __init fsnotify_mark_init(void) +{ + struct task_struct *thread; + + thread = kthread_run(fsnotify_mark_destroy, NULL, + "fsnotify_mark"); + if (IS_ERR(thread)) + panic("unable to start fsnotify mark destruction thread."); + + return 0; +} +device_initcall(fsnotify_mark_init); -- cgit v1.2.3