summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Cross <ccross@android.com>2012-04-16 12:31:13 +0530
committerVarun Wadekar <vwadekar@nvidia.com>2012-04-16 15:15:49 +0530
commitfaf8f118ff921d25279e894375068574b180c8c5 (patch)
tree6116990272d7061e51c81a99708a4e43291feb1a
parent07f5cc84ffabddb5f4ba2c06f01b05c41880e647 (diff)
cgroup: Remove call to synchronize_rcu in cgroup_attach_task
synchronize_rcu can be very expensive, averaging 100 ms in some cases. In cgroup_attach_task, it is used to prevent a task->cgroups pointer dereferenced in an RCU read side critical section from being invalidated, by delaying the call to put_css_set until after an RCU grace period. To avoid the call to synchronize_rcu, make the put_css_set call rcu-safe by moving the deletion of the css_set links into free_css_set_work, scheduled by the rcu callback free_css_set_rcu. The decrement of the cgroup refcount is no longer synchronous with the call to put_css_set, which can result in the cgroup refcount staying positive after the last call to cgroup_attach_task returns. To allow the cgroup to be deleted with cgroup_rmdir synchronously after cgroup_attach_task, have rmdir check the refcount of all associated css_sets. If cgroup_rmdir is called on a cgroup for which the css_sets all have refcount zero but the cgroup refcount is nonzero, reuse the rmdir waitqueue to block the rmdir until free_css_set_work is called. Signed-off-by: Colin Cross <ccross@android.com> Conflicts: kernel/cgroup.c Change-Id: I3b3f245c8f5e2e5d33f1e54178b2bb6ef10a0817 Conflicts: kernel/cgroup.c Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
-rw-r--r--kernel/cgroup.c70
1 files changed, 54 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 78635fffec16..6c0e1c129534 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -373,6 +373,37 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
return &css_set_table[index];
}
+static void free_css_set_work(struct work_struct *work)
+{
+ struct css_set *cg = container_of(work, struct css_set, work);
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
+
+ write_lock(&css_set_lock);
+ list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+ cg_link_list) {
+ struct cgroup *cgrp = link->cgrp;
+ list_del(&link->cg_link_list);
+ list_del(&link->cgrp_link_list);
+ if (atomic_dec_and_test(&cgrp->count)) {
+ check_for_release(cgrp);
+ cgroup_wakeup_rmdir_waiter(cgrp);
+ }
+ kfree(link);
+ }
+ write_unlock(&css_set_lock);
+
+ kfree(cg);
+}
+
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+ struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+
+ INIT_WORK(&cg->work, free_css_set_work);
+ schedule_work(&cg->work);
+}
+
/* We don't maintain the lists running through each css_set to its
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
@@ -389,23 +420,24 @@ static inline void get_css_set(struct css_set *cg)
static void put_css_set(struct css_set *cg)
{
- struct css_set *cg = container_of(work, struct css_set, work);
- struct cg_cgroup_link *link;
- struct cg_cgroup_link *saved_link;
-
+ /*
+ * Ensure that the refcount doesn't hit zero while any readers
+ * can see it. Similar to atomic_dec_and_lock(), but for an
+ * rwlock
+ */
+ if (atomic_add_unless(&cg->refcount, -1, 1))
+ return;
write_lock(&css_set_lock);
- list_for_each_entry_safe(link, saved_link, &cg->cg_links,
- cg_link_list) {
- struct cgroup *cgrp = link->cgrp;
- list_del(&link->cg_link_list);
- list_del(&link->cgrp_link_list);
- if (atomic_dec_and_test(&cgrp->count))
- check_for_release(cgrp);
- kfree(link);
+ if (!atomic_dec_and_test(&cg->refcount)) {
+ write_unlock(&css_set_lock);
+ return;
}
- write_unlock(&css_set_lock);
- kfree(cg);
+ hlist_del(&cg->hlist);
+ css_set_count--;
+
+ write_unlock(&css_set_lock);
+ call_rcu(&cg->rcu_head, free_css_set_rcu);
}
static void free_css_set_rcu(struct rcu_head *obj)
@@ -1874,6 +1906,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
struct cgroupfs_root *root = cgrp->root;
struct cgroup_taskset tset = { };
struct css_set *newcg;
+ struct css_set *cg;
/* @tsk either already exited or can't exit until the end */
if (tsk->flags & PF_EXITING)
@@ -1909,15 +1942,20 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
goto out;
}
+ task_lock(tsk);
+ cg = tsk->cgroups;
+ get_css_set(cg);
+ task_unlock(tsk);
+
cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
for_each_subsys(root, ss) {
if (ss->attach)
ss->attach(cgrp, &tset);
}
-
set_bit(CGRP_RELEASABLE, &cgrp->flags);
- synchronize_rcu();
+ /* put_css_set will not destroy cg until after an RCU grace period */
+ put_css_set(cg);
/*
* wake up rmdir() waiter. the rmdir should fail since the cgroup