summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-04-21 19:06:48 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2016-05-04 14:48:49 -0700
commitd52097476caeb14f4d7e3417dda08220d2813cc4 (patch)
treee7fdd0d15a0833977162fe99e2aea17cce43b5a1 /kernel
parenta4e25ff31103e7c9084904418cb95596e3e9d9cf (diff)
cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback
commit 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 upstream. Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset kicks off asynchronous NUMA node migration if necessary during task migration and flushes it from cpuset_post_attach_flush() which is called at the end of __cgroup_procs_write(). This is to avoid performing migration with cgroup_threadgroup_rwsem write-locked which can lead to deadlock through dependency on kworker creation. memcg has a similar issue with charge moving, so let's convert it to an official callback rather than the current one-off cpuset specific function. This patch adds cgroup_subsys->post_attach callback and makes cpuset register cpuset_post_attach_flush() as its ->post_attach. The conversion is mostly one-to-one except that the new callback is called under cgroup_mutex. This is to guarantee that no other migration operations are started before ->post_attach callbacks are finished. cgroup_mutex is one of the outermost mutex in the system and has never been and shouldn't be a problem. We can add specialized synchronization around __cgroup_procs_write() but I don't think there's any noticeable benefit. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c7
-rw-r--r--kernel/cpuset.c4
2 files changed, 7 insertions, 4 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dc94f8beb097..b0ea3aebc05a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2721,9 +2721,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
+ struct cgroup_subsys *ss;
struct cgroup *cgrp;
pid_t pid;
- int ret;
+ int ssid, ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
@@ -2771,8 +2772,10 @@ out_unlock_rcu:
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
+ for_each_subsys(ss, ssid)
+ if (ss->post_attach)
+ ss->post_attach();
cgroup_kn_unlock(of->kn);
- cpuset_post_attach_flush();
return ret ?: nbytes;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2ade632197d5..11eaf14b52c2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -57,7 +57,6 @@
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
-#include <linux/workqueue.h>
#include <linux/cgroup.h>
#include <linux/wait.h>
@@ -1015,7 +1014,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
}
}
-void cpuset_post_attach_flush(void)
+static void cpuset_post_attach(void)
{
flush_workqueue(cpuset_migrate_mm_wq);
}
@@ -2083,6 +2082,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.can_attach = cpuset_can_attach,
.cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach,
+ .post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.legacy_cftypes = files,
.early_init = 1,