diff options
author | Colin Cross <ccross@android.com> | 2012-04-16 12:31:13 +0530 |
---|---|---|
committer | Varun Wadekar <vwadekar@nvidia.com> | 2012-04-16 15:15:49 +0530 |
commit | faf8f118ff921d25279e894375068574b180c8c5 (patch) | |
tree | 6116990272d7061e51c81a99708a4e43291feb1a | |
parent | 07f5cc84ffabddb5f4ba2c06f01b05c41880e647 (diff) |
cgroup: Remove call to synchronize_rcu in cgroup_attach_task
synchronize_rcu can be very expensive, averaging 100 ms in
some cases. In cgroup_attach_task, it is used to prevent
a task->cgroups pointer dereferenced in an RCU read side
critical section from being invalidated, by delaying the
call to put_css_set until after an RCU grace period.
To avoid the call to synchronize_rcu, make the put_css_set
call rcu-safe by moving the deletion of the css_set links
into free_css_set_work, scheduled by the rcu callback
free_css_set_rcu.
The decrement of the cgroup refcount is no longer
synchronous with the call to put_css_set, which can result
in the cgroup refcount staying positive after the last call
to cgroup_attach_task returns. To allow the cgroup to be
deleted with cgroup_rmdir synchronously after
cgroup_attach_task, have rmdir check the refcount of all
associated css_sets. If cgroup_rmdir is called on a cgroup
for which the css_sets all have refcount zero but the
cgroup refcount is nonzero, reuse the rmdir waitqueue to
block the rmdir until free_css_set_work is called.
Signed-off-by: Colin Cross <ccross@android.com>
Conflicts:
kernel/cgroup.c
Change-Id: I3b3f245c8f5e2e5d33f1e54178b2bb6ef10a0817
Conflicts:
kernel/cgroup.c
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
-rw-r--r-- | kernel/cgroup.c | 70 |
1 files changed, 54 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 78635fffec16..6c0e1c129534 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -373,6 +373,37 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) return &css_set_table[index]; } +static void free_css_set_work(struct work_struct *work) +{ + struct css_set *cg = container_of(work, struct css_set, work); + struct cg_cgroup_link *link; + struct cg_cgroup_link *saved_link; + + write_lock(&css_set_lock); + list_for_each_entry_safe(link, saved_link, &cg->cg_links, + cg_link_list) { + struct cgroup *cgrp = link->cgrp; + list_del(&link->cg_link_list); + list_del(&link->cgrp_link_list); + if (atomic_dec_and_test(&cgrp->count)) { + check_for_release(cgrp); + cgroup_wakeup_rmdir_waiter(cgrp); + } + kfree(link); + } + write_unlock(&css_set_lock); + + kfree(cg); +} + +static void free_css_set_rcu(struct rcu_head *obj) +{ + struct css_set *cg = container_of(obj, struct css_set, rcu_head); + + INIT_WORK(&cg->work, free_css_set_work); + schedule_work(&cg->work); +} + /* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups @@ -389,23 +420,24 @@ static inline void get_css_set(struct css_set *cg) static void put_css_set(struct css_set *cg) { - struct css_set *cg = container_of(work, struct css_set, work); - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; - + /* + * Ensure that the refcount doesn't hit zero while any readers + * can see it. Similar to atomic_dec_and_lock(), but for an + * rwlock + */ + if (atomic_add_unless(&cg->refcount, -1, 1)) + return; write_lock(&css_set_lock); - list_for_each_entry_safe(link, saved_link, &cg->cg_links, - cg_link_list) { - struct cgroup *cgrp = link->cgrp; - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); - if (atomic_dec_and_test(&cgrp->count)) - check_for_release(cgrp); - kfree(link); + if (!atomic_dec_and_test(&cg->refcount)) { + write_unlock(&css_set_lock); + return; } - write_unlock(&css_set_lock); - kfree(cg); + hlist_del(&cg->hlist); + css_set_count--; + + write_unlock(&css_set_lock); + call_rcu(&cg->rcu_head, free_css_set_rcu); } static void free_css_set_rcu(struct rcu_head *obj) @@ -1874,6 +1906,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct cgroupfs_root *root = cgrp->root; struct cgroup_taskset tset = { }; struct css_set *newcg; + struct css_set *cg; /* @tsk either already exited or can't exit until the end */ if (tsk->flags & PF_EXITING) @@ -1909,15 +1942,20 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) goto out; } + task_lock(tsk); + cg = tsk->cgroups; + get_css_set(cg); + task_unlock(tsk); + cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); for_each_subsys(root, ss) { if (ss->attach) ss->attach(cgrp, &tset); } - set_bit(CGRP_RELEASABLE, &cgrp->flags); - synchronize_rcu(); + /* put_css_set will not destroy cg until after an RCU grace period */ + put_css_set(cg); /* * wake up rmdir() waiter. the rmdir should fail since the cgroup |