From f01ae9cb0de282abfd20cd3c2e3477adbdb766ce Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 16 Jun 2016 15:57:15 +0300 Subject: sched/fair: Do not announce throttled next buddy in dequeue_task_fair() commit 754bd598be9bbc953bc709a9e8ed7f3188bfb9d7 upstream. Hierarchy could be already throttled at this point. Throttled next buddy could trigger a NULL pointer dereference in pick_next_task_fair(). Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/146608183552.21905.15924473394414832071.stgit@buzz Signed-off-by: Ingo Molnar Cc: Ben Pineau Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8f258f437ac2..3fa53654b7f2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4233,15 +4233,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { + /* Avoid re-evaluating load for this entity: */ + se = parent_entity(se); /* * Bias pick_next to pick a task from this cfs_rq, as * p is sleeping when it is within its sched_slice. */ - if (task_sleep && parent_entity(se)) - set_next_buddy(parent_entity(se)); - - /* avoid re-evaluating load for this entity */ - se = parent_entity(se); + if (task_sleep && se && !throttled_hierarchy(cfs_rq)) + set_next_buddy(se); break; } flags |= DEQUEUE_SLEEP; -- cgit v1.2.3 From ada79b5ecda79ec7b53053d9955a5ee04c8dd633 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 16 Jun 2016 15:57:01 +0300 Subject: sched/fair: Initialize throttle_count for new task-groups lazily commit 094f469172e00d6ab0a3130b0e01c83b3cf3a98d upstream. Cgroup created inside throttled group must inherit current throttle_count. Broken throttle_count allows to nominate throttled entries as a next buddy, later this leads to null pointer dereference in pick_next_task_fair(). This patch initialize cfs_rq->throttle_count at first enqueue: laziness allows to skip locking all rq at group creation. Lazy approach also allows to skip full sub-tree scan at throttling hierarchy (not in this patch). Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bsegall@google.com Link: http://lkml.kernel.org/r/146608182119.21870.8439834428248129633.stgit@buzz Signed-off-by: Ingo Molnar Cc: Ben Pineau Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3fa53654b7f2..812069b66f47 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3918,6 +3918,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) if (!cfs_bandwidth_used()) return; + /* Synchronize hierarchical throttle counter: */ + if (unlikely(!cfs_rq->throttle_uptodate)) { + struct rq *rq = rq_of(cfs_rq); + struct cfs_rq *pcfs_rq; + struct task_group *tg; + + cfs_rq->throttle_uptodate = 1; + + /* Get closest up-to-date node, because leaves go first: */ + for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) { + pcfs_rq = tg->cfs_rq[cpu_of(rq)]; + if (pcfs_rq->throttle_uptodate) + break; + } + if (tg) { + cfs_rq->throttle_count = pcfs_rq->throttle_count; + cfs_rq->throttled_clock_task = rq_clock_task(rq); + } + } + /* an active group must be handled by the update_curr()->put() path */ if (!cfs_rq->runtime_enabled || cfs_rq->curr) return; -- cgit v1.2.3