From 77e4ef99d1c596a31747668e5fd837f77b6349b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 12 Dec 2011 18:12:21 -0800 Subject: threadgroup: extend threadgroup_lock() to cover exit and exec threadgroup_lock() protected only protected against new addition to the threadgroup, which was inherently somewhat incomplete and problematic for its only user cgroup. On-going migration could race against exec and exit leading to interesting problems - the symmetry between various attach methods, task exiting during method execution, ->exit() racing against attach methods, migrating task switching basic properties during exec and so on. This patch extends threadgroup_lock() such that it protects against all three threadgroup altering operations - fork, exit and exec. For exit, threadgroup_change_begin/end() calls are added to exit_signals around assertion of PF_EXITING. For exec, threadgroup_[un]lock() are updated to also grab and release cred_guard_mutex. With this change, threadgroup_lock() guarantees that the target threadgroup will remain stable - no new task will be added, no new PF_EXITING will be set and exec won't happen. The next patch will update cgroup so that it can take full advantage of this change. -v2: beefed up comment as suggested by Frederic. -v3: narrowed scope of protection in exit path as suggested by Frederic. Signed-off-by: Tejun Heo Reviewed-by: KAMEZAWA Hiroyuki Acked-by: Li Zefan Acked-by: Frederic Weisbecker Cc: Oleg Nesterov Cc: Andrew Morton Cc: Paul Menage Cc: Linus Torvalds --- kernel/signal.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index b3f78d09a105..399c184bf0ae 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2359,8 +2359,15 @@ void exit_signals(struct task_struct *tsk) int group_stop = 0; sigset_t unblocked; + /* + * @tsk is about to have PF_EXITING set - lock out users which + * expect stable threadgroup. + */ + threadgroup_change_begin(tsk); + if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { tsk->flags |= PF_EXITING; + threadgroup_change_end(tsk); return; } @@ -2370,6 +2377,9 @@ void exit_signals(struct task_struct *tsk) * see wants_signal(), do_signal_stop(). */ tsk->flags |= PF_EXITING; + + threadgroup_change_end(tsk); + if (!signal_pending(tsk)) goto out; -- cgit v1.2.3 From 648616343cdbe904c585a6c12e323d3b3c72e46f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 15 Dec 2011 14:56:09 +0100 Subject: [S390] cputime: add sparse checking and cleanup Make cputime_t and cputime64_t nocast to enable sparse checking to detect incorrect use of cputime. Drop the cputime macros for simple scalar operations. The conversion macros are still needed. Signed-off-by: Martin Schwidefsky --- kernel/signal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index b3f78d09a105..739ef2bf105c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1629,10 +1629,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, - tsk->signal->utime)); - info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, - tsk->signal->stime)); + info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); + info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) -- cgit v1.2.3 From 8a88951b5878dc475dcd841cefc767e36397d14e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jan 2012 17:29:20 +0100 Subject: ptrace: ensure JOBCTL_STOP_SIGMASK is not zero after detach This is the temporary simple fix for 3.2, we need more changes in this area. 1. do_signal_stop() assumes that the running untraced thread in the stopped thread group is not possible. This was our goal but it is not yet achieved: a stopped-but-resumed tracee can clone the running thread which can initiate another group-stop. Remove WARN_ON_ONCE(!current->ptrace). 2. A new thread always starts with ->jobctl = 0. If it is auto-attached and this group is stopped, __ptrace_unlink() sets JOBCTL_STOP_PENDING but JOBCTL_STOP_SIGMASK part is zero, this triggers WANR_ON(!signr) in do_jobctl_trap() if another debugger attaches. Change __ptrace_unlink() to set the artificial SIGSTOP for report. Alternatively we could change ptrace_init_task() to copy signr from current, but this means we can copy it for no reason and hide the possible similar problems. Acked-by: Tejun Heo Cc: [3.1] Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/signal.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index b3f78d09a105..206551563cce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1994,8 +1994,6 @@ static bool do_signal_stop(int signr) */ if (!(sig->flags & SIGNAL_STOP_STOPPED)) sig->group_exit_code = signr; - else - WARN_ON_ONCE(!current->ptrace); sig->group_stop_count = 0; -- cgit v1.2.3 From 5e6292c0f28f03dfdb8ea3d685f0b838a23bfba4 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 10 Jan 2012 15:11:17 -0800 Subject: signal: add block_sigmask() for adding sigmask to current->blocked Abstract the code sequence for adding a signal handler's sa_mask to current->blocked because the sequence is identical for all architectures. Furthermore, in the past some architectures actually got this code wrong, so introduce a wrapper that all architectures can use. Signed-off-by: Matt Fleming Signed-off-by: Oleg Nesterov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Tejun Heo Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index bb0efa5705ed..d532f1709fbf 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2318,6 +2318,27 @@ relock: return signr; } +/** + * block_sigmask - add @ka's signal mask to current->blocked + * @ka: action for @signr + * @signr: signal that has been successfully delivered + * + * This function should be called when a signal has succesfully been + * delivered. It adds the mask of signals for @ka to current->blocked + * so that they are blocked during the execution of the signal + * handler. In addition, @signr will be blocked unless %SA_NODEFER is + * set in @ka->sa.sa_flags. + */ +void block_sigmask(struct k_sigaction *ka, int signr) +{ + sigset_t blocked; + + sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(&blocked, signr); + set_current_blocked(&blocked); +} + /* * It could be that complete_signal() picked us to notify about the * group-wide signal. Other threads should be notified now to take -- cgit v1.2.3 From 6b550f9495947fc279d12c38feaf98500e8d0646 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Tue, 10 Jan 2012 15:11:37 -0800 Subject: user namespace: make signal.c respect user namespaces ipc/mqueue.c: for __SI_MESQ, convert the uid being sent to recipient's user namespace. (new, thanks Oleg) __send_signal: convert current's uid to the recipient's user namespace for any siginfo which is not SI_FROMKERNEL (patch from Oleg, thanks again :) do_notify_parent and do_notify_parent_cldstop: map task's uid to parent's user namespace ptrace_signal maps parent's uid into current's user namespace before including in signal to current. IIUC Oleg has argued that this shouldn't matter as the debugger will play with it, but it seems like not converting the value currently being set is misleading. Changelog: Sep 20: Inspired by Oleg's suggestion, define map_cred_ns() helper to simplify callers and help make clear what we are translating (which uid into which namespace). Passing the target task would make callers even easier to read, but we pass in user_ns because current_user_ns() != task_cred_xxx(current, user_ns). Sep 20: As recommended by Oleg, also put task_pid_vnr() under rcu_read_lock in ptrace_signal(). Sep 23: In send_signal(), detect when (user) signal is coming from an ancestor or unrelated user namespace. Pass that on to __send_signal, which sets si_uid to 0 or overflowuid if needed. Oct 12: Base on Oleg's fixup_uid() patch. On top of that, handle all SI_FROMKERNEL cases at callers, because we can't assume sender is current in those cases. Nov 10: (mhelsley) rename fixup_uid to more meaningful usern_fixup_signal_uid Nov 10: (akpm) make the !CONFIG_USER_NS case clearer Signed-off-by: Serge Hallyn Cc: Oleg Nesterov Cc: Matt Helsley Cc: "Eric W. Biederman" From: Serge Hallyn Subject: __send_signal: pass q->info, not info, to userns_fixup_signal_uid (v2) Eric Biederman pointed out that passing info is a bug and could lead to a NULL pointer deref to boot. A collection of signal, securebits, filecaps, cap_bounds, and a few other ltp tests passed with this kernel. Changelog: Nov 18: previous patch missed a leading '&' Signed-off-by: Serge Hallyn Cc: "Eric W. Biederman" From: Dan Carpenter Subject: ipc/mqueue: lock() => unlock() typo There was a double lock typo introduced in b085f4bd6b21 "user namespace: make signal.c respect user namespaces" Signed-off-by: Dan Carpenter Cc: Oleg Nesterov Cc: Matt Helsley Cc: "Eric W. Biederman" Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index d532f1709fbf..c73c4284160e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -28,6 +28,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -1019,6 +1020,34 @@ static inline int legacy_queue(struct sigpending *signals, int sig) return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); } +/* + * map the uid in struct cred into user namespace *ns + */ +static inline uid_t map_cred_ns(const struct cred *cred, + struct user_namespace *ns) +{ + return user_ns_map_uid(ns, cred, cred->uid); +} + +#ifdef CONFIG_USER_NS +static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) +{ + if (current_user_ns() == task_cred_xxx(t, user_ns)) + return; + + if (SI_FROMKERNEL(info)) + return; + + info->si_uid = user_ns_map_uid(task_cred_xxx(t, user_ns), + current_cred(), info->si_uid); +} +#else +static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t) +{ + return; +} +#endif + static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, int group, int from_ancestor_ns) { @@ -1088,6 +1117,9 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, q->info.si_pid = 0; break; } + + userns_fixup_signal_uid(&q->info, t); + } else if (!is_si_special(info)) { if (sig >= SIGRTMIN && info->si_code != SI_USER) { /* @@ -1626,7 +1658,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); - info.si_uid = __task_cred(tsk)->uid; + info.si_uid = map_cred_ns(__task_cred(tsk), + task_cred_xxx(tsk->parent, user_ns)); rcu_read_unlock(); info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); @@ -1709,7 +1742,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); - info.si_uid = __task_cred(tsk)->uid; + info.si_uid = map_cred_ns(__task_cred(tsk), + task_cred_xxx(parent, user_ns)); rcu_read_unlock(); info.si_utime = cputime_to_clock_t(tsk->utime); @@ -2125,8 +2159,11 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_signo = signr; info->si_errno = 0; info->si_code = SI_USER; + rcu_read_lock(); info->si_pid = task_pid_vnr(current->parent); - info->si_uid = task_uid(current->parent); + info->si_uid = map_cred_ns(__task_cred(current->parent), + current_user_ns()); + rcu_read_unlock(); } /* If the (new) signal is now blocked, requeue it. */ -- cgit v1.2.3