From 5091faa449ee0b7d73bc296a93bca9540fc51d0a Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 30 Nov 2010 14:18:03 +0100 Subject: sched: Add 'autogroup' scheduling feature: automated per session task groups A recurring complaint from CFS users is that parallel kbuild has a negative impact on desktop interactivity. This patch implements an idea from Linus, to automatically create task groups. Currently, only per session autogroups are implemented, but the patch leaves the way open for enhancement. Implementation: each task's signal struct contains an inherited pointer to a refcounted autogroup struct containing a task group pointer, the default for all tasks pointing to the init_task_group. When a task calls setsid(), a new task group is created, the process is moved into the new task group, and a reference to the preveious task group is dropped. Child processes inherit this task group thereafter, and increase it's refcount. When the last thread of a process exits, the process's reference is dropped, such that when the last process referencing an autogroup exits, the autogroup is destroyed. At runqueue selection time, IFF a task has no cgroup assignment, its current autogroup is used. Autogroup bandwidth is controllable via setting it's nice level through the proc filesystem: cat /proc//autogroup Displays the task's group and the group's nice level. echo > /proc//autogroup Sets the task group's shares to the weight of nice task. Setting nice level is rate limited for !admin users due to the abuse risk of task group locking. The feature is enabled from boot by default if CONFIG_SCHED_AUTOGROUP=y is selected, but can be disabled via the boot option noautogroup, and can also be turned on/off on the fly via: echo [01] > /proc/sys/kernel/sched_autogroup_enabled ... which will automatically move tasks to/from the root task group. Signed-off-by: Mike Galbraith Acked-by: Linus Torvalds Acked-by: Peter Zijlstra Cc: Markus Trippelsdorf Cc: Mathieu Desnoyers Cc: Paul Turner Cc: Oleg Nesterov [ Removed the task_group_path() debug code, and fixed !EVENTFD build failure. ] Signed-off-by: Ingo Molnar LKML-Reference: <1290281700.28711.9.camel@maggy.simson.net> Signed-off-by: Ingo Molnar --- kernel/fork.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..b6f2475f1e83 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig) static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) + if (atomic_dec_and_test(&sig->sigcnt)) { + sched_autogroup_exit(sig); free_signal_struct(sig); + } } void __put_task_struct(struct task_struct *tsk) @@ -904,6 +906,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) posix_cpu_timers_init_group(sig); tty_audit_fork(sig); + sched_autogroup_fork(sig); sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; -- cgit v1.2.3 From f26f9aff6aaf67e9a430d16c266f91b13a5bff64 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 8 Dec 2010 11:05:42 +0100 Subject: Sched: fix skip_clock_update optimization idle_balance() drops/retakes rq->lock, leaving the previous task vulnerable to set_tsk_need_resched(). Clear it after we return from balancing instead, and in setup_thread_stack() as well, so no successfully descheduled or never scheduled task has it set. Need resched confused the skip_clock_update logic, which assumes that the next call to update_rq_clock() will come nearly immediately after being set. Make the optimization robust against the waking a sleeper before it sucessfully deschedules case by checking that the current task has not been dequeued before setting the flag, since it is that useless clock update we're trying to save, and clear unconditionally in schedule() proper instead of conditionally in put_prev_task(). Signed-off-by: Mike Galbraith Reported-by: Bjoern B. Brandenburg Tested-by: Yong Zhang Signed-off-by: Peter Zijlstra Cc: stable@kernel.org LKML-Reference: <1291802742.1417.9.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..5447dc7defa9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); + clear_tsk_need_resched(tsk); stackend = end_of_stack(tsk); *stackend = STACK_END_MAGIC; /* for overflow detection */ -- cgit v1.2.3 From 909ea96468096b07fbb41aaf69be060d92bd9271 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 8 Dec 2010 16:22:55 +0100 Subject: core: Replace __get_cpu_var with __this_cpu_read if not used for an address. __get_cpu_var() can be replaced with this_cpu_read and will then use a single read instruction with implied address calculation to access the correct per cpu instance. However, the address of a per cpu variable passed to __this_cpu_read() cannot be determined (since it's an implied address conversion through segment prefixes). Therefore apply this only to uses of __get_cpu_var where the address of the variable is not used. Cc: Pekka Enberg Cc: Hugh Dickins Cc: Thomas Gleixner Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b7..e05e27de67df 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1282,7 +1282,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); - __get_cpu_var(process_counts)++; + __this_cpu_inc(process_counts); } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; -- cgit v1.2.3 From 101e5f77bf35679809586e250b6c62193d2ed179 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 31 Dec 2010 09:32:30 +0100 Subject: sched, autogroup: Fix reference leak The cgroup exit mess also uncovered a struct autogroup reference leak. copy_process() was simply freeing vs putting the signal_struct, stranding a reference. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Oleg Nesterov LKML-Reference: <1293784350.6839.2.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index b6f2475f1e83..067244495966 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1317,7 +1317,7 @@ bad_fork_cleanup_mm: } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) - free_signal_struct(p->signal); + put_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: -- cgit v1.2.3 From 1c5354de90c900b369e2ebd36c3a065ede29eb93 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 5 Jan 2011 11:16:04 +0100 Subject: sched: Move sched_autogroup_exit() to free_signal_struct() Per Oleg's suggestion, undo fork failure free/put_signal_struct change, and move sched_autogroup_exit() to free_signal_struct() instead. Signed-off-by: Mike Galbraith Reviewed-by: Oleg Nesterov Signed-off-by: Peter Zijlstra LKML-Reference: <1294222564.8369.6.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- kernel/fork.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 7d164e25b0f0..dc1a8bbcea7b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -169,15 +169,14 @@ EXPORT_SYMBOL(free_task); static inline void free_signal_struct(struct signal_struct *sig) { taskstats_tgid_free(sig); + sched_autogroup_exit(sig); kmem_cache_free(signal_cachep, sig); } static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) { - sched_autogroup_exit(sig); + if (atomic_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); - } } void __put_task_struct(struct task_struct *tsk) @@ -1318,7 +1317,7 @@ bad_fork_cleanup_mm: } bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) - put_signal_struct(p->signal); + free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: -- cgit v1.2.3