From 45ceebf77653975815d82fcf7cec0a164215ae11 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 19 Apr 2013 15:10:49 -0400 Subject: sched: Factor out load calculation code from sched/core.c --> sched/proc.c This large chunk of load calculation code can be easily divorced from the main core.c scheduler file, with only a couple prototypes and externs added to a kernel/sched header. Some recent commits expanded the code and the documentation of it, making it large enough to warrant separation. For example, see: 556061b, "sched/nohz: Fix rq->cpu_load[] calculations" 5aaa0b7, "sched/nohz: Fix rq->cpu_load calculations some more" 5167e8d, "sched/nohz: Rewrite and fix load-avg computation -- again" More importantly, it helps reduce the size of the main sched/core.c by yet another significant amount (~600 lines). Signed-off-by: Paul Gortmaker Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1366398650-31599-2-git-send-email-paul.gortmaker@windriver.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ce39224d6155..a38ee0a0650e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -10,8 +10,16 @@ #include "cpupri.h" #include "cpuacct.h" +struct rq; + extern __read_mostly int scheduler_running; +extern unsigned long calc_load_update; +extern atomic_long_t calc_load_tasks; + +extern long calc_load_fold_active(struct rq *this_rq); +extern void update_cpu_load_active(struct rq *this_rq); + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], -- cgit v1.2.3 From 8527632dc95472adb571701e852479531c0567a2 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 19 Apr 2013 15:10:50 -0400 Subject: sched: Move update_load_*() methods from sched.h to fair.c These inlines are only used by kernel/sched/fair.c so they do not need to be present in the main kernel/sched/sched.h file. Signed-off-by: Paul Gortmaker Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1366398650-31599-3-git-send-email-paul.gortmaker@windriver.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a38ee0a0650e..f1f6256c1224 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -892,24 +892,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x4 /* internal use, task got migrated */ -static inline void update_load_add(struct load_weight *lw, unsigned long inc) -{ - lw->weight += inc; - lw->inv_weight = 0; -} - -static inline void update_load_sub(struct load_weight *lw, unsigned long dec) -{ - lw->weight -= dec; - lw->inv_weight = 0; -} - -static inline void update_load_set(struct load_weight *lw, unsigned long w) -{ - lw->weight = w; - lw->inv_weight = 0; -} - /* * To aid in avoiding the subversion of "niceness" due to uneven distribution * of tasks with abnormal "nice" values across CPUs the contribution that -- cgit v1.2.3 From c5405a495e88d93cf9b4f4cc91507c7f4afcb901 Mon Sep 17 00:00:00 2001 From: Neil Zhang Date: Thu, 11 Apr 2013 21:04:59 +0800 Subject: sched: Remove redundant update_runtime notifier migration_call() will do all the things that update_runtime() does. So let's remove it. Furthermore, there is potential risk that the current code will catch BUG_ON at line 689 of rt.c when do cpu hotplug while there are realtime threads running because of enabling runtime twice while the rt_runtime may already changed. Signed-off-by: Neil Zhang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1365685499-26515-1-git-send-email-zhangwm@marvell.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f1f6256c1224..c806c61a1261 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1041,7 +1041,6 @@ static inline void idle_balance(int cpu, struct rq *rq) extern void sysrq_sched_debug_show(void); extern void sched_init_granularity(void); extern void update_max_interval(void); -extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu); extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); -- cgit v1.2.3 From 78becc27097585c6aec7043834cadde950ae79f2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 01:51:02 +0200 Subject: sched: Use an accessor to read the rq clock Read the runqueue clock through an accessor. This prepares for adding a debugging infrastructure to detect missing or redundant calls to update_rq_clock() between a scheduler's entry and exit point. Signed-off-by: Frederic Weisbecker Cc: Li Zhong Cc: Steven Rostedt Cc: Paul Turner Cc: Mike Galbraith Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1365724262-20142-6-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c806c61a1261..74ff659e964f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -548,6 +548,16 @@ DECLARE_PER_CPU(struct rq, runqueues); #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() (&__raw_get_cpu_var(runqueues)) +static inline u64 rq_clock(struct rq *rq) +{ + return rq->clock; +} + +static inline u64 rq_clock_task(struct rq *rq) +{ + return rq->clock_task; +} + #ifdef CONFIG_SMP #define rcu_dereference_check_sched_domain(p) \ -- cgit v1.2.3 From e23ee74777f389369431d77390c4b09332ce026a Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 7 Jun 2013 15:37:43 -0400 Subject: sched/rt: Simplify pull_rt_task() logic and remove .leaf_rt_rq_list [ Peter, this is based off of some of my work, I ran it though a few tests and it passed. I also reviewed it, and added my SOB as I am somewhat a co-author to it. ] Based on the patch by Steven Rostedt from previous year: https://lkml.org/lkml/2012/4/18/517 1)Simplify pull_rt_task() logic: search in pushable tasks of dest runqueue. The only pullable tasks are the tasks which are pushable in their local rq, and no others. 2)Remove .leaf_rt_rq_list member of struct rt_rq and functions connected with it: nobody uses it since now. Signed-off-by: Kirill Tkhai Signed-off-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/287571370557898@web7d.yandex.ru Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 74ff659e964f..029601a61587 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -361,7 +361,6 @@ struct rt_rq { unsigned long rt_nr_boosted; struct rq *rq; - struct list_head leaf_rt_rq_list; struct task_group *tg; #endif }; -- cgit v1.2.3 From 141965c7494d984b2bf24efd361a3125278869c6 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Wed, 26 Jun 2013 13:05:39 +0800 Subject: Revert "sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking" Remove CONFIG_FAIR_GROUP_SCHED that covers the runnable info, then we can use runnable load variables. Also remove 2 CONFIG_FAIR_GROUP_SCHED setting which is not in reverted patch(introduced in 9ee474f), but also need to revert. Signed-off-by: Alex Shi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/51CA76A3.3050207@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 029601a61587..77ce668ba302 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -269,12 +269,6 @@ struct cfs_rq { #endif #ifdef CONFIG_SMP -/* - * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be - * removed when useful for applications beyond shares distribution (e.g. - * load-balance). - */ -#ifdef CONFIG_FAIR_GROUP_SCHED /* * CFS Load tracking * Under CFS, load is tracked on a per-entity basis and aggregated up. @@ -284,9 +278,9 @@ struct cfs_rq { u64 runnable_load_avg, blocked_load_avg; atomic64_t decay_counter, removed_load; u64 last_decay; -#endif /* CONFIG_FAIR_GROUP_SCHED */ -/* These always depend on CONFIG_FAIR_GROUP_SCHED */ + #ifdef CONFIG_FAIR_GROUP_SCHED + /* Required to track per-cpu representation of a task_group */ u32 tg_runnable_contrib; u64 tg_load_contrib; #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -1027,17 +1021,8 @@ extern void update_group_power(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq, int cpu); extern void idle_balance(int this_cpu, struct rq *this_rq); -/* - * Only depends on SMP, FAIR_GROUP_SCHED may be removed when runnable_avg - * becomes useful in lb - */ -#if defined(CONFIG_FAIR_GROUP_SCHED) extern void idle_enter_fair(struct rq *this_rq); extern void idle_exit_fair(struct rq *this_rq); -#else -static inline void idle_enter_fair(struct rq *this_rq) {} -static inline void idle_exit_fair(struct rq *this_rq) {} -#endif #else /* CONFIG_SMP */ -- cgit v1.2.3 From fa6bddeb14d59d701f846b174b59c9982e926e66 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 20 Jun 2013 10:18:46 +0800 Subject: sched: Move a few runnable tg variables into CONFIG_SMP The following 2 variables are only used under CONFIG_SMP, so its better to move their definiation into CONFIG_SMP too. atomic64_t load_avg; atomic_t runnable_avg; Signed-off-by: Alex Shi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1371694737-29336-3-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 77ce668ba302..31d25f80a7c6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -149,9 +149,11 @@ struct task_group { unsigned long shares; atomic_t load_weight; +#ifdef CONFIG_SMP atomic64_t load_avg; atomic_t runnable_avg; #endif +#endif #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity **rt_se; -- cgit v1.2.3 From a75cdaa915e42ef0e6f38dc7f2a6a1deca91d648 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 20 Jun 2013 10:18:47 +0800 Subject: sched: Set an initial value of runnable avg for new forked task We need to initialize the se.avg.{decay_count, load_avg_contrib} for a new forked task. Otherwise random values of above variables cause a mess when a new task is enqueued: enqueue_task_fair enqueue_entity enqueue_entity_load_avg and make fork balancing imbalance due to incorrect load_avg_contrib. Further more, Morten Rasmussen notice some tasks were not launched at once after created. So Paul and Peter suggest giving a start value for new task runnable avg time same as sched_slice(). PeterZ said: > So the 'problem' is that our running avg is a 'floating' average; ie. it > decays with time. Now we have to guess about the future of our newly > spawned task -- something that is nigh impossible seeing these CPU > vendors keep refusing to implement the crystal ball instruction. > > So there's two asymptotic cases we want to deal well with; 1) the case > where the newly spawned program will be 'nearly' idle for its lifetime; > and 2) the case where its cpu-bound. > > Since we have to guess, we'll go for worst case and assume its > cpu-bound; now we don't want to make the avg so heavy adjusting to the > near-idle case takes forever. We want to be able to quickly adjust and > lower our running avg. > > Now we also don't want to make our avg too light, such that it gets > decremented just for the new task not having had a chance to run yet -- > even if when it would run, it would be more cpu-bound than not. > > So what we do is we make the initial avg of the same duration as that we > guess it takes to run each task on the system at least once -- aka > sched_slice(). > > Of course we can defeat this with wakeup/fork bombs, but in the 'normal' > case it should be good enough. Paul also contributed most of the code comments in this commit. Signed-off-by: Alex Shi Reviewed-by: Gu Zheng Reviewed-by: Paul Turner [peterz; added explanation of sched_slice() usage] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1371694737-29336-4-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31d25f80a7c6..9c65d46504b1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1048,6 +1048,8 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime extern void update_idle_cpu_load(struct rq *this_rq); +extern void init_task_runnable_average(struct task_struct *p); + #ifdef CONFIG_PARAVIRT static inline u64 steal_ticks(u64 steal) { -- cgit v1.2.3 From 72a4cf20cb71a327c636c7042fdacc25abffc87c Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 20 Jun 2013 10:18:53 +0800 Subject: sched: Change cfs_rq load avg to unsigned long Since the 'u64 runnable_load_avg, blocked_load_avg' in cfs_rq struct are smaller than 'unsigned long' cfs_rq->load.weight. We don't need u64 vaiables to describe them. unsigned long is more efficient and convenience. Signed-off-by: Alex Shi Reviewed-by: Paul Turner Tested-by: Vincent Guittot Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1371694737-29336-10-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9c65d46504b1..9eb12d9edd35 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -277,7 +277,7 @@ struct cfs_rq { * This allows for the description of both thread and group usage (in * the FAIR_GROUP_SCHED case). */ - u64 runnable_load_avg, blocked_load_avg; + unsigned long runnable_load_avg, blocked_load_avg; atomic64_t decay_counter, removed_load; u64 last_decay; -- cgit v1.2.3 From bf5b986ed4d20428eeec3df4a03dbfebb9b6538c Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 20 Jun 2013 10:18:54 +0800 Subject: sched/tg: Use 'unsigned long' for load variable in task group Since tg->load_avg is smaller than tg->load_weight, we don't need a atomic64_t variable for load_avg in 32 bit machine. The same reason for cfs_rq->tg_load_contrib. The atomic_long_t/unsigned long variable type are more efficient and convenience for them. Signed-off-by: Alex Shi Tested-by: Vincent Guittot Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1371694737-29336-11-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9eb12d9edd35..5585eb25e9a3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -150,7 +150,7 @@ struct task_group { atomic_t load_weight; #ifdef CONFIG_SMP - atomic64_t load_avg; + atomic_long_t load_avg; atomic_t runnable_avg; #endif #endif @@ -284,7 +284,7 @@ struct cfs_rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* Required to track per-cpu representation of a task_group */ u32 tg_runnable_contrib; - u64 tg_load_contrib; + unsigned long tg_load_contrib; #endif /* CONFIG_FAIR_GROUP_SCHED */ /* -- cgit v1.2.3 From 2509940fd71c2e2915a05052bbdbf2d478364184 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 20 Jun 2013 10:18:55 +0800 Subject: sched/cfs_rq: Change atomic64_t removed_load to atomic_long_t Similar to runnable_load_avg, blocked_load_avg variable, long type is enough for removed_load in 64 bit or 32 bit machine. Then we avoid the expensive atomic64 operations on 32 bit machine. Signed-off-by: Alex Shi Reviewed-by: Paul Turner Tested-by: Vincent Guittot Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1371694737-29336-12-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 5585eb25e9a3..705991906fbe 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -278,8 +278,9 @@ struct cfs_rq { * the FAIR_GROUP_SCHED case). */ unsigned long runnable_load_avg, blocked_load_avg; - atomic64_t decay_counter, removed_load; + atomic64_t decay_counter; u64 last_decay; + atomic_long_t removed_load; #ifdef CONFIG_FAIR_GROUP_SCHED /* Required to track per-cpu representation of a task_group */ -- cgit v1.2.3 From a9cef46a10cc1b84bf2cdf4060766d858c0439d8 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 20 Jun 2013 10:18:56 +0800 Subject: sched/tg: Remove tg.load_weight Since no one use it. Signed-off-by: Alex Shi Reviewed-by: Paul Turner Tested-by: Vincent Guittot Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1371694737-29336-13-git-send-email-alex.shi@intel.com Signed-off-by: Ingo Molnar --- kernel/sched/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/sched/sched.h') diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 705991906fbe..ef0a7b2439dd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -148,7 +148,6 @@ struct task_group { struct cfs_rq **cfs_rq; unsigned long shares; - atomic_t load_weight; #ifdef CONFIG_SMP atomic_long_t load_avg; atomic_t runnable_avg; -- cgit v1.2.3