summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorSai Gurrappadi <sgurrappadi@nvidia.com>2014-01-21 16:41:37 -0800
committerDiwakar Tundlam <dtundlam@nvidia.com>2014-03-03 19:38:54 -0800
commit3bfdbefc2cf0ecf2933250813c356b3d147e59e0 (patch)
treede830b9368a6ac8b3788fbb47054b7633771c8a0 /kernel
parent33db1f7eb8e2d9eaad2dce65152cd5b2a4a27fae (diff)
sched: Force sleep on consecutive sched_yields
If a task sched_yields to itself continuously, force the task to sleep in sched_yield. This will lower the CPU load of this task thereby lowering the cpu frequency and improving power. Added a stat variable to track how many times we sleep due these consecutive sched_yields. Also added sysctl knobs to control the number of consecutive sched_yields before which the sleep kicks in and the duration fo the sleep in us. Bug 1424617 Change-Id: Ie92412b8b900365816e17237fcbd0aac6e9c94ce Signed-off-by: Sai Gurrappadi <sgurrappadi@nvidia.com> Reviewed-on: http://git-master/r/358455 Reviewed-by: Wen Yi <wyi@nvidia.com> Reviewed-by: Peter Zu <pzu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Diwakar Tundlam <dtundlam@nvidia.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c24
-rw-r--r--kernel/sched/debug.c1
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/sched/stats.c5
-rw-r--r--kernel/sysctl.c14
5 files changed, 43 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 62cb6b24ab46..825447720620 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -297,7 +297,18 @@ __read_mostly int scheduler_running;
*/
int sysctl_sched_rt_runtime = 950000;
+/*
+ * Number of sched_yield calls that result in a thread yielding
+ * to itself before a sleep is injected in its next sched_yield call
+ * Setting this to -1 will disable adding sleep in sched_yield
+ */
+const_debug int sysctl_sched_yield_sleep_threshold = 4;
+/*
+ * Sleep duration in us used when sched_yield_sleep_threshold
+ * is exceeded.
+ */
+const_debug unsigned int sysctl_sched_yield_sleep_duration = 50;
/*
* __task_rq_lock - lock the rq @p resides on.
@@ -3035,6 +3046,7 @@ need_resched:
if (likely(prev != next)) {
rq->nr_switches++;
rq->curr = next;
+ prev->yield_count = 0;
++*switch_count;
context_switch(rq, prev, next); /* unlocks the rq */
@@ -3046,8 +3058,10 @@ need_resched:
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
- } else
+ } else {
+ prev->yield_count++;
raw_spin_unlock_irq(&rq->lock);
+ }
post_schedule(rq);
@@ -4352,6 +4366,8 @@ SYSCALL_DEFINE0(sched_yield)
struct rq *rq = this_rq_lock();
schedstat_inc(rq, yld_count);
+ if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold)
+ schedstat_inc(rq, yield_sleep_count);
current->sched_class->yield_task(rq);
/*
@@ -4363,7 +4379,11 @@ SYSCALL_DEFINE0(sched_yield)
do_raw_spin_unlock(&rq->lock);
sched_preempt_enable_no_resched();
- schedule();
+ if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold)
+ usleep_range(sysctl_sched_yield_sleep_duration,
+ sysctl_sched_yield_sleep_duration + 5);
+ else
+ schedule();
return 0;
}
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 75024a673520..068ad55aa641 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -302,6 +302,7 @@ do { \
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P(yld_count);
+ P(yield_sleep_count);
P(sched_count);
P(sched_goidle);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 35bd8b7f3a87..0fc275c70d7d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -511,6 +511,7 @@ struct rq {
/* sys_sched_yield() stats */
unsigned int yld_count;
+ unsigned int yield_sleep_count;
/* schedule() stats */
unsigned int sched_count;
@@ -1143,6 +1144,8 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_time_avg;
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
+extern const_debug unsigned int sysctl_sched_yield_sleep_duration;
+extern const_debug int sysctl_sched_yield_sleep_threshold;
static inline u64 sched_avg_period(void)
{
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index da98af347e8b..dff505e53f7f 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -35,12 +35,13 @@ static int show_schedstat(struct seq_file *seq, void *v)
/* runqueue-specific stats */
seq_printf(seq,
- "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
+ "cpu%d %u 0 %u %u %u %u %llu %llu %lu %u",
cpu, rq->yld_count,
rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_cpu_time,
- rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+ rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount,
+ rq->yield_sleep_count);
seq_printf(seq, "\n");
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2ba77228ff97..207454a598f4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -312,6 +312,20 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns,
},
+ {
+ .procname = "sched_yield_sleep_threshold",
+ .data = &sysctl_sched_yield_sleep_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_yield_sleep_duration",
+ .data = &sysctl_sched_yield_sleep_duration,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_SMP
{
.procname = "sched_tunable_scaling",