From 3bfdbefc2cf0ecf2933250813c356b3d147e59e0 Mon Sep 17 00:00:00 2001 From: Sai Gurrappadi Date: Tue, 21 Jan 2014 16:41:37 -0800 Subject: sched: Force sleep on consecutive sched_yields If a task sched_yields to itself continuously, force the task to sleep in sched_yield. This will lower the CPU load of this task thereby lowering the cpu frequency and improving power. Added a stat variable to track how many times we sleep due these consecutive sched_yields. Also added sysctl knobs to control the number of consecutive sched_yields before which the sleep kicks in and the duration fo the sleep in us. Bug 1424617 Change-Id: Ie92412b8b900365816e17237fcbd0aac6e9c94ce Signed-off-by: Sai Gurrappadi Reviewed-on: http://git-master/r/358455 Reviewed-by: Wen Yi Reviewed-by: Peter Zu GVS: Gerrit_Virtual_Submit Reviewed-by: Diwakar Tundlam --- kernel/sched/core.c | 24 ++++++++++++++++++++++-- kernel/sched/debug.c | 1 + kernel/sched/sched.h | 3 +++ kernel/sched/stats.c | 5 +++-- kernel/sysctl.c | 14 ++++++++++++++ 5 files changed, 43 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 62cb6b24ab46..825447720620 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -297,7 +297,18 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; +/* + * Number of sched_yield calls that result in a thread yielding + * to itself before a sleep is injected in its next sched_yield call + * Setting this to -1 will disable adding sleep in sched_yield + */ +const_debug int sysctl_sched_yield_sleep_threshold = 4; +/* + * Sleep duration in us used when sched_yield_sleep_threshold + * is exceeded. + */ +const_debug unsigned int sysctl_sched_yield_sleep_duration = 50; /* * __task_rq_lock - lock the rq @p resides on. @@ -3035,6 +3046,7 @@ need_resched: if (likely(prev != next)) { rq->nr_switches++; rq->curr = next; + prev->yield_count = 0; ++*switch_count; context_switch(rq, prev, next); /* unlocks the rq */ @@ -3046,8 +3058,10 @@ need_resched: */ cpu = smp_processor_id(); rq = cpu_rq(cpu); - } else + } else { + prev->yield_count++; raw_spin_unlock_irq(&rq->lock); + } post_schedule(rq); @@ -4352,6 +4366,8 @@ SYSCALL_DEFINE0(sched_yield) struct rq *rq = this_rq_lock(); schedstat_inc(rq, yld_count); + if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold) + schedstat_inc(rq, yield_sleep_count); current->sched_class->yield_task(rq); /* @@ -4363,7 +4379,11 @@ SYSCALL_DEFINE0(sched_yield) do_raw_spin_unlock(&rq->lock); sched_preempt_enable_no_resched(); - schedule(); + if (rq->curr->yield_count == sysctl_sched_yield_sleep_threshold) + usleep_range(sysctl_sched_yield_sleep_duration, + sysctl_sched_yield_sleep_duration + 5); + else + schedule(); return 0; } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 75024a673520..068ad55aa641 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -302,6 +302,7 @@ do { \ #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); P(yld_count); + P(yield_sleep_count); P(sched_count); P(sched_goidle); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 35bd8b7f3a87..0fc275c70d7d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -511,6 +511,7 @@ struct rq { /* sys_sched_yield() stats */ unsigned int yld_count; + unsigned int yield_sleep_count; /* schedule() stats */ unsigned int sched_count; @@ -1143,6 +1144,8 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); extern const_debug unsigned int sysctl_sched_time_avg; extern const_debug unsigned int sysctl_sched_nr_migrate; extern const_debug unsigned int sysctl_sched_migration_cost; +extern const_debug unsigned int sysctl_sched_yield_sleep_duration; +extern const_debug int sysctl_sched_yield_sleep_threshold; static inline u64 sched_avg_period(void) { diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index da98af347e8b..dff505e53f7f 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -35,12 +35,13 @@ static int show_schedstat(struct seq_file *seq, void *v) /* runqueue-specific stats */ seq_printf(seq, - "cpu%d %u 0 %u %u %u %u %llu %llu %lu", + "cpu%d %u 0 %u %u %u %u %llu %llu %lu %u", cpu, rq->yld_count, rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, rq->rq_cpu_time, - rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); + rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount, + rq->yield_sleep_count); seq_printf(seq, "\n"); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2ba77228ff97..207454a598f4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -312,6 +312,20 @@ static struct ctl_table kern_table[] = { .extra1 = &min_wakeup_granularity_ns, .extra2 = &max_wakeup_granularity_ns, }, + { + .procname = "sched_yield_sleep_threshold", + .data = &sysctl_sched_yield_sleep_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sched_yield_sleep_duration", + .data = &sysctl_sched_yield_sleep_duration, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_SMP { .procname = "sched_tunable_scaling", -- cgit v1.2.3