From abaa93d9e1de2c29297e69ddba8ddd38f15064cf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Jun 2014 13:30:25 -0700 Subject: rcu: Simplify priority boosting by putting rt_mutex in rcu_node RCU priority boosting currently checks for boosting via a pointer in task_struct. However, this is not needed: As Oleg noted, if the rt_mutex is placed in the rcu_node instead of on the booster's stack, the boostee can simply check it see if it owns the lock. This commit makes this change, shrinking task_struct by one pointer and the kernel by thirteen lines. Suggested-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987a..3cfbc05e66e6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1270,9 +1270,6 @@ struct task_struct { #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#ifdef CONFIG_RCU_BOOST - struct rt_mutex *rcu_boost_mutex; -#endif /* #ifdef CONFIG_RCU_BOOST */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; @@ -2009,9 +2006,6 @@ static inline void rcu_copy_process(struct task_struct *p) #ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#ifdef CONFIG_RCU_BOOST - p->rcu_boost_mutex = NULL; -#endif /* #ifdef CONFIG_RCU_BOOST */ INIT_LIST_HEAD(&p->rcu_node_entry); } -- cgit v1.2.3 From 466af29bf4270e84261712428a1304c28e3743fa Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Jun 2014 18:52:06 +0200 Subject: sched/deadline: Kill task_struct->pi_top_task Remove task_struct->pi_top_task. The only user, rt_mutex_setprio(), can use a local. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra Cc: Juri Lelli Cc: Alex Thorlton Cc: Andrew Morton Cc: Daeseok Youn Cc: Dario Faggioli Cc: Davidlohr Bueso Cc: David Rientjes Cc: Eric W. Biederman Cc: Linus Torvalds Cc: Matthew Dempsky Cc: Michal Simek Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/20140606165206.GB29465@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987a..c9c9ff723525 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1440,8 +1440,6 @@ struct task_struct { struct rb_node *pi_waiters_leftmost; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; - /* Top pi_waiters task */ - struct task_struct *pi_top_task; #endif #ifdef CONFIG_DEBUG_MUTEXES -- cgit v1.2.3 From 8875125efe8402c4d84b08291e68f1281baba8e2 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Sun, 29 Jun 2014 00:03:57 +0400 Subject: sched: Transform resched_task() into resched_curr() We always use resched_task() with rq->curr argument. It's not possible to reschedule any task but rq's current. The patch introduces resched_curr(struct rq *) to replace all of the repeating patterns. The main aim is cleanup, but there is a little size profit too: (before) $ size kernel/sched/built-in.o text data bss dec hex filename 155274 16445 7042 178761 2ba49 kernel/sched/built-in.o $ size vmlinux text data bss dec hex filename 7411490 1178376 991232 9581098 92322a vmlinux (after) $ size kernel/sched/built-in.o text data bss dec hex filename 155130 16445 7042 178617 2b9b9 kernel/sched/built-in.o $ size vmlinux text data bss dec hex filename 7411362 1178376 991232 9580970 9231aa vmlinux I was choosing between resched_curr() and resched_rq(), and the first name looks better for me. A little lie in Documentation/trace/ftrace.txt. I have not actually collected the tracing again. With a hope the patch won't make execution times much worse :) Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Randy Dunlap Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20140628200219.1778.18735.stgit@localhost Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index c9c9ff723525..41a195385081 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2786,7 +2786,7 @@ static inline bool __must_check current_set_polling_and_test(void) /* * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() + * paired by resched_curr() */ smp_mb__after_atomic(); @@ -2804,7 +2804,7 @@ static inline bool __must_check current_clr_polling_and_test(void) /* * Polling state must be visible before we test NEED_RESCHED, - * paired by resched_task() + * paired by resched_curr() */ smp_mb__after_atomic(); @@ -2836,7 +2836,7 @@ static inline void current_clr_polling(void) * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also * fold. */ - smp_mb(); /* paired with resched_task() */ + smp_mb(); /* paired with resched_curr() */ preempt_fold_need_resched(); } -- cgit v1.2.3 From 57e0be041d9e21a7397eed3b67a7936ac4ac83c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:32 +0000 Subject: sched: Make task->real_start_time nanoseconds based Simplify the only user of this data by removing the timespec conversion. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987a..67678fa76f99 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1368,7 +1368,7 @@ struct task_struct { #endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ - struct timespec real_start_time; /* boot based time */ + u64 real_start_time; /* boot based time in nsec */ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; -- cgit v1.2.3 From ccbf62d8a284cf181ac28c8e8407dd077d90dd4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:34 +0000 Subject: sched: Make task->start_time nanoseconds based Simplify the timespec to nsec/usec conversions. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 67678fa76f99..10c6e829927f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1367,7 +1367,7 @@ struct task_struct { } vtime_snap_whence; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ - struct timespec start_time; /* monotonic time */ + u64 start_time; /* monotonic time in nsec */ u64 real_start_time; /* boot based time in nsec */ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; -- cgit v1.2.3 From 9667a23db0dc0bd4892f0ada7e4e71528eaeed62 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:35 +0000 Subject: delayacct: Make accounting nanosecond based Kill the timespec juggling and calculate with plain nanoseconds. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 10c6e829927f..653744ae8d27 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -813,7 +813,7 @@ struct task_delay_info { * associated with the operation is added to XXX_delay. * XXX_delay contains the accumulated delay time in nanoseconds. */ - struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ + u64 blkio_start; /* Shared by blkio, swapin */ u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_delay; /* wait for swapin block io completion */ u32 blkio_count; /* total count of the number of sync block */ @@ -821,7 +821,7 @@ struct task_delay_info { u32 swapin_count; /* total count of the number of swapin block */ /* io operations performed */ - struct timespec freepages_start, freepages_end; + u64 freepages_start; u64 freepages_delay; /* wait for memory reclaim */ u32 freepages_count; /* total count of memory reclaim */ }; -- cgit v1.2.3 From 72f15c03977acc8f06080e6c8a91d93bfc655a65 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 5 Mar 2014 15:15:22 +0100 Subject: sas_ss_flags: Remove nested ternary if ...to make it readable. Signed-off-by: Richard Weinberger --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0376b054a0d0..795ea2bc3d4f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2360,8 +2360,10 @@ static inline int on_sig_stack(unsigned long sp) static inline int sas_ss_flags(unsigned long sp) { - return (current->sas_ss_size == 0 ? SS_DISABLE - : on_sig_stack(sp) ? SS_ONSTACK : 0); + if (!current->sas_ss_size) + return SS_DISABLE; + + return on_sig_stack(sp) ? SS_ONSTACK : 0; } static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) -- cgit v1.2.3 From 372ba8cb46b271a7662b92cbefedee56725f6bd0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 14:19:21 +0100 Subject: cpuidle: menu: Lookup CPU runqueues less The menu governer makes separate lookups of the CPU runqueue to get load and number of IO waiters but it can be done with a single lookup. Signed-off-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987a..641bd954bb5d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -168,8 +168,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); -extern unsigned long this_cpu_load(void); - +extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); -- cgit v1.2.3 From 747db954cab64c6b7a95b121b517165f34751898 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 8 Aug 2014 14:19:24 -0700 Subject: mm: memcontrol: use page lists for uncharge batching Pages are now uncharged at release time, and all sources of batched uncharges operate on lists of pages. Directly use those lists, and get rid of the per-task batching state. This also batches statistics accounting, in addition to the res counter charges, to reduce IRQ-disabling and re-enabling. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Hugh Dickins Cc: Tejun Heo Cc: Vladimir Davydov Cc: Naoya Horiguchi Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7c19d552dc3f..4fcf82a4d243 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1628,12 +1628,6 @@ struct task_struct { unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ - struct memcg_batch_info { - int do_batch; /* incremented when batch uncharge started */ - struct mem_cgroup *memcg; /* target memcg of uncharge */ - unsigned long nr_pages; /* uncharged usage */ - unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ - } memcg_batch; unsigned int memcg_kmem_skip_account; struct memcg_oom_info { struct mem_cgroup *memcg; -- cgit v1.2.3 From 33144e8429bd7fceacbb869a7f5061db42e13fe6 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 8 Aug 2014 14:22:03 -0700 Subject: kernel/fork.c: make mm_init_owner static It's only used in fork.c:mm_init(). Signed-off-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4fcf82a4d243..b21e9218c0fd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2961,15 +2961,10 @@ static inline void inc_syscw(struct task_struct *tsk) #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); -extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); #else static inline void mm_update_next_owner(struct mm_struct *mm) { } - -static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) -{ -} #endif /* CONFIG_MEMCG */ static inline unsigned long task_rlimit(const struct task_struct *tsk, -- cgit v1.2.3 From ab602f799159393143d567e5c04b936fec79d6bd Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Fri, 8 Aug 2014 14:23:19 -0700 Subject: shm: make exit_shm work proportional to task activity This is small set of patches our team has had kicking around for a few versions internally that fixes tasks getting hung on shm_exit when there are many threads hammering it at once. Anton wrote a simple test to cause the issue: http://ozlabs.org/~anton/junkcode/bust_shm_exit.c Before applying this patchset, this test code will cause either hanging tracebacks or pthread out of memory errors. After this patchset, it will still produce output like: root@somehost:~# ./bust_shm_exit 1024 160 ... INFO: rcu_sched detected stalls on CPUs/tasks: {} (detected by 116, t=2111 jiffies, g=241, c=240, q=7113) INFO: Stall ended before state dump start ... But the task will continue to run along happily, so we consider this an improvement over hanging, even if it's a bit noisy. This patch (of 3): exit_shm obtains the ipc_ns shm rwsem for write and holds it while it walks every shared memory segment in the namespace. Thus the amount of work is related to the number of shm segments in the namespace not the number of segments that might need to be cleaned. In addition, this occurs after the task has been notified the thread has exited, so the number of tasks waiting for the ns shm rwsem can grow without bound until memory is exausted. Add a list to the task struct of all shmids allocated by this task. Init the list head in copy_process. Use the ns->rwsem for locking. Add segments after id is added, remove before removing from id. On unshare of NEW_IPCNS orphan any ids as if the task had exited, similar to handling of semaphore undo. I chose a define for the init sequence since its a simple list init, otherwise it would require a function call to avoid include loops between the semaphore code and the task struct. Converting the list_del to list_del_init for the unshare cases would remove the exit followed by init, but I left it blow up if not inited. Signed-off-by: Milton Miller Signed-off-by: Jack Miller Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index b21e9218c0fd..db2f6474e95e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -33,6 +33,7 @@ struct sched_param { #include #include +#include #include #include #include @@ -1385,6 +1386,7 @@ struct task_struct { #ifdef CONFIG_SYSVIPC /* ipc stuff */ struct sysv_sem sysvsem; + struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK /* hung task detection */ -- cgit v1.2.3 From 6a40281ab5c1ed8ba2253857118a5d400a2d084b Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Sat, 20 Sep 2014 10:17:51 -0500 Subject: sched: Fix end_of_stack() and location of stack canary for architectures using CONFIG_STACK_GROWSUP Aaron Tomlin recently posted patches [1] to enable checking the stack canary on every task switch. Looking at the canary code, I realized that every arch (except ia64, which adds some space for register spill above the stack) shares a definition of end_of_stack() that makes it the first long after the threadinfo. For stacks that grow down, this low address is correct because the stack starts at the end of the thread area and grows toward lower addresses. However, for stacks that grow up, toward higher addresses, this is wrong. (The stack actually grows away from the canary.) On these archs end_of_stack() should return the address of the last long, at the highest possible address for the stack. [1] http://lkml.org/lkml/2014/9/12/293 Signed-off-by: Chuck Ebbert Link: http://lkml.kernel.org/r/20140920101751.6c5166b6@as Signed-off-by: Ingo Molnar Tested-by: James Hogan [metag] Acked-by: James Hogan Acked-by: Aaron Tomlin --- include/linux/sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..1f07040d28e3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2608,9 +2608,22 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct task_thread_info(p)->task = p; } +/* + * Return the address of the last usable long on the stack. + * + * When the stack grows down, this is just above the thread + * info struct. Going any lower will corrupt the threadinfo. + * + * When the stack grows up, this is the highest address. + * Beyond that position, we corrupt data on the next page. + */ static inline unsigned long *end_of_stack(struct task_struct *p) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; +#else return (unsigned long *)(task_thread_info(p) + 1); +#endif } #endif -- cgit v1.2.3 From a2b86f772227bcaf962c8b134f8d187046ac5f0e Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 25 Sep 2014 09:40:17 +0800 Subject: sched: fix confusing PFA_NO_NEW_PRIVS constant Commit 1d4457f99928 ("sched: move no_new_privs into new atomic flags") defined PFA_NO_NEW_PRIVS as hexadecimal value, but it is confusing because it is used as bit number. Redefine it as decimal bit number. Note this changes the bit position of PFA_NOW_NEW_PRIVS from 1 to 0. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Miao Xie Signed-off-by: Tetsuo Handa Acked-by: Kees Cook [ lizf: slightly modified subject and changelog ] Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5c2c885ee52b..45577650f629 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1957,7 +1957,7 @@ static inline void memalloc_noio_restore(unsigned int flags) } /* Per-process atomic flags. */ -#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */ +#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ static inline bool task_no_new_privs(struct task_struct *p) { -- cgit v1.2.3 From e0e5070b20e01f0321f97db4e4e174f3f6b49e50 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 25 Sep 2014 09:40:40 +0800 Subject: sched: add macros to define bitops for task atomic flags This will simplify code when we add new flags. v3: - Kees pointed out that no_new_privs should never be cleared, so we shouldn't define task_clear_no_new_privs(). we define 3 macros instead of a single one. v2: - updated scripts/tags.sh, suggested by Peter Cc: Ingo Molnar Cc: Miao Xie Cc: Tetsuo Handa Acked-by: Peter Zijlstra (Intel) Acked-by: Kees Cook Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/sched.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 45577650f629..5630763956d9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1959,15 +1959,18 @@ static inline void memalloc_noio_restore(unsigned int flags) /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ -static inline bool task_no_new_privs(struct task_struct *p) -{ - return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); -} - -static inline void task_set_no_new_privs(struct task_struct *p) -{ - set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags); -} +#define TASK_PFA_TEST(name, func) \ + static inline bool task_##func(struct task_struct *p) \ + { return test_bit(PFA_##name, &p->atomic_flags); } +#define TASK_PFA_SET(name, func) \ + static inline void task_set_##func(struct task_struct *p) \ + { set_bit(PFA_##name, &p->atomic_flags); } +#define TASK_PFA_CLEAR(name, func) \ + static inline void task_clear_##func(struct task_struct *p) \ + { clear_bit(PFA_##name, &p->atomic_flags); } + +TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) +TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) /* * task->jobctl flags -- cgit v1.2.3 From 2ad654bc5e2b211e92f66da1d819e47d79a866f0 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 25 Sep 2014 09:41:02 +0800 Subject: cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags When we change cpuset.memory_spread_{page,slab}, cpuset will flip PF_SPREAD_{PAGE,SLAB} bit of tsk->flags for each task in that cpuset. This should be done using atomic bitops, but currently we don't, which is broken. Tetsuo reported a hard-to-reproduce kernel crash on RHEL6, which happened when one thread tried to clear PF_USED_MATH while at the same time another thread tried to flip PF_SPREAD_PAGE/PF_SPREAD_SLAB. They both operate on the same task. Here's the full report: https://lkml.org/lkml/2014/9/19/230 To fix this, we make PF_SPREAD_PAGE and PF_SPREAD_SLAB atomic flags. v4: - updated mm/slab.c. (Fengguang Wu) - updated Documentation. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Miao Xie Cc: Kees Cook Fixes: 950592f7b991 ("cpusets: update tasks' page/slab spread flags in time") Cc: # 2.6.31+ Reported-by: Tetsuo Handa Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/sched.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5630763956d9..7b1cafefb05e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1903,8 +1903,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ -#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ @@ -1958,6 +1956,9 @@ static inline void memalloc_noio_restore(unsigned int flags) /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ + #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1972,6 +1973,14 @@ static inline void memalloc_noio_restore(unsigned int flags) TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) +TASK_PFA_TEST(SPREAD_PAGE, spread_page) +TASK_PFA_SET(SPREAD_PAGE, spread_page) +TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) + +TASK_PFA_TEST(SPREAD_SLAB, spread_slab) +TASK_PFA_SET(SPREAD_SLAB, spread_slab) +TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) + /* * task->jobctl flags */ -- cgit v1.2.3