From 164c33c6adee609b8b9062cce4c10f764d0dce13 Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Mon, 25 Jun 2012 18:18:15 -0700 Subject: sched: Fix fork() error path to not crash In dup_task_struct(), if arch_dup_task_struct() fails, the clean up code fails to clean up correctly. That's because the clean up code depends on unininitalized ti->task pointer. We fix this by making sure that the task and thread_info know about each other before we attempt to take the error path. Signed-off-by: Salman Qazi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20120626011815.11323.5533.stgit@dungbeetle.mtv.corp.google.com Signed-off-by: Ingo Molnar --- kernel/fork.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..f00e319d8376 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -304,12 +304,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) } err = arch_dup_task_struct(tsk, orig); - if (err) - goto out; + /* + * We defer looking at err, because we will need this setup + * for the clean up path to work correctly. + */ tsk->stack = ti; - setup_thread_stack(tsk, orig); + + if (err) + goto out; + clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); stackend = end_of_stack(tsk); -- cgit v1.2.3 From 158e1645e07f3e9f7e4962d7a0997f5c3b98311b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 09:24:13 +0400 Subject: trim task_work: get rid of hlist layout based on Oleg's suggestion; single-linked list, task->task_works points to the last element, forward pointer from said last element points to head. I'd still prefer much more regular scheme with two pointers in task_work, but... Signed-off-by: Al Viro --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..bebabad59202 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1415,7 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_HLIST_HEAD(&p->task_works); + p->task_works = NULL; /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible -- cgit v1.2.3 From b2412b7fa7a3816fa8633dc2ff19f1a90aabe423 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 30 Jul 2012 14:42:30 -0700 Subject: fork: use vma_pages() to simplify the code The current code can be replaced by vma_pages(). So use it to simplify the code. [akpm@linux-foundation.org: initialise `len' at its definition site] Signed-off-by: Huang Shijie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index ff1cad3b7bdc..2c1802948a38 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -391,8 +391,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) } charge = 0; if (mpnt->vm_flags & VM_ACCOUNT) { - unsigned long len; - len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + unsigned long len = vma_pages(mpnt); + if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ goto fail_nomem; charge = len; -- cgit v1.2.3 From 87bec58a52652e2eb2a575692a40f9466c7bd31b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 30 Jul 2012 14:42:31 -0700 Subject: revert "sched: Fix fork() error path to not crash" To make way for "fork: fix error handling in dup_task()", which fixes the errors more completely. Cc: Salman Qazi Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 2c1802948a38..088025bd1925 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -304,17 +304,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) } err = arch_dup_task_struct(tsk, orig); - - /* - * We defer looking at err, because we will need this setup - * for the clean up path to work correctly. - */ - tsk->stack = ti; - setup_thread_stack(tsk, orig); - if (err) goto out; + tsk->stack = ti; + + setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); stackend = end_of_stack(tsk); -- cgit v1.2.3 From f19b9f74b7ea3b21ddcee55d852a6488239608a4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 30 Jul 2012 14:42:33 -0700 Subject: fork: fix error handling in dup_task() The function dup_task() may fail at the following function calls in the following order. 0) alloc_task_struct_node() 1) alloc_thread_info_node() 2) arch_dup_task_struct() Error by 0) is not a matter, it can just return. But error by 1) requires releasing task_struct allocated by 0) before it returns. Likewise, error by 2) requires releasing task_struct and thread_info allocated by 0) and 1). The existing error handling calls free_task_struct() and free_thread_info() which do not only release task_struct and thread_info, but also call architecture specific arch_release_task_struct() and arch_release_thread_info(). The problem is that task_struct and thread_info are not fully initialized yet at this point, but arch_release_task_struct() and arch_release_thread_info() are called with them. For example, x86 defines its own arch_release_task_struct() that releases a task_xstate. If alloc_thread_info_node() fails in dup_task(), arch_release_task_struct() is called with task_struct which is just allocated and filled with garbage in this error handling. This actually happened with tools/testing/fault-injection/failcmd.sh # env FAILCMD_TYPE=fail_page_alloc \ ./tools/testing/fault-injection/failcmd.sh --times=100 \ --min-order=0 --ignore-gfp-wait=0 \ -- make -C tools/testing/selftests/ run_tests In order to fix this issue, make free_{task_struct,thread_info}() not to call arch_release_{task_struct,thread_info}() and call arch_release_{task_struct,thread_info}() implicitly where needed. Default arch_release_task_struct() and arch_release_thread_info() are defined as empty by default. So this change only affects the architectures which implement their own arch_release_task_struct() or arch_release_thread_info() as listed below. arch_release_task_struct(): x86, sh arch_release_thread_info(): mn10300, tile Signed-off-by: Akinobu Mita Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: David Howells Cc: Koichi Yasutake Cc: Paul Mundt Cc: Chris Metcalf Cc: Salman Qazi Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 088025bd1925..8efac1fe56bc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -114,6 +114,10 @@ int nr_processes(void) return total; } +void __weak arch_release_task_struct(struct task_struct *tsk) +{ +} + #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static struct kmem_cache *task_struct_cachep; @@ -122,17 +126,17 @@ static inline struct task_struct *alloc_task_struct_node(int node) return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); } -void __weak arch_release_task_struct(struct task_struct *tsk) { } - static inline void free_task_struct(struct task_struct *tsk) { - arch_release_task_struct(tsk); kmem_cache_free(task_struct_cachep, tsk); } #endif +void __weak arch_release_thread_info(struct thread_info *ti) +{ +} + #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR -void __weak arch_release_thread_info(struct thread_info *ti) { } /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a @@ -150,7 +154,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static inline void free_thread_info(struct thread_info *ti) { - arch_release_thread_info(ti); free_pages((unsigned long)ti, THREAD_SIZE_ORDER); } # else @@ -164,7 +167,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, static void free_thread_info(struct thread_info *ti) { - arch_release_thread_info(ti); kmem_cache_free(thread_info_cache, ti); } @@ -205,10 +207,12 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); + arch_release_thread_info(tsk->stack); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); + arch_release_task_struct(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -298,14 +302,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; ti = alloc_thread_info_node(tsk, node); - if (!ti) { - free_task_struct(tsk); - return NULL; - } + if (!ti) + goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto out; + goto free_ti; tsk->stack = ti; @@ -333,8 +335,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return tsk; -out: +free_ti: free_thread_info(ti); +free_tsk: free_task_struct(tsk); return NULL; } -- cgit v1.2.3 From 44de9d0cad41f2c51ef26916842be046b582dcc9 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 31 Jul 2012 16:41:49 -0700 Subject: mm: account the total_vm in the vm_stat_account() vm_stat_account() accounts the shared_vm, stack_vm and reserved_vm now. But we can also account for total_vm in the vm_stat_account() which makes the code tidy. Even for mprotect_fixup(), we can get the right result in the end. Signed-off-by: Huang Shijie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 8efac1fe56bc..aaa8813c45d1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -381,10 +381,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { - long pages = vma_pages(mpnt); - mm->total_vm -= pages; vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, - -pages); + -vma_pages(mpnt)); continue; } charge = 0; -- cgit v1.2.3 From c255a458055e459f65eb7b7f51dc5dbdd0caf1d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Jul 2012 16:43:02 -0700 Subject: memcg: rename config variables Sanity: CONFIG_CGROUP_MEM_RES_CTLR -> CONFIG_MEMCG CONFIG_CGROUP_MEM_RES_CTLR_SWAP -> CONFIG_MEMCG_SWAP CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED -> CONFIG_MEMCG_SWAP_ENABLED CONFIG_CGROUP_MEM_RES_CTLR_KMEM -> CONFIG_MEMCG_KMEM [mhocko@suse.cz: fix missed bits] Cc: Glauber Costa Acked-by: Michal Hocko Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Tejun Heo Cc: Aneesh Kumar K.V Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index aaa8813c45d1..3bd2280d79f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1306,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif -- cgit v1.2.3