summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/exit.c31
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c269
-rw-r--r--kernel/futex_compat.c9
-rw-r--r--kernel/irq/spurious.c46
-rw-r--r--kernel/kallsyms.c3
-rw-r--r--kernel/kthread.c7
-rw-r--r--kernel/power/disk.c3
-rw-r--r--kernel/power/main.c19
-rw-r--r--kernel/power/process.c57
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/rtmutex.c24
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/signal.c38
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/clocksource.c10
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/tick-broadcast.c17
-rw-r--r--kernel/time/tick-sched.c28
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/time/timer_stats.c44
-rw-r--r--kernel/timer.c12
-rw-r--r--kernel/workqueue.c84
25 files changed, 457 insertions, 268 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 6c61263ff96d..74cc0fc6bb81 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -311,6 +311,7 @@ int audit_match_class(int class, unsigned syscall)
return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall);
}
+#ifdef CONFIG_AUDITSYSCALL
static inline int audit_match_class_bits(int class, u32 *mask)
{
int i;
@@ -347,6 +348,7 @@ static int audit_match_signal(struct audit_entry *entry)
return 1;
}
}
+#endif
/* Common user-space to kernel rule translation. */
static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
diff --git a/kernel/exit.c b/kernel/exit.c
index c6d14b8008dd..5c8ecbaa19a5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -762,11 +762,8 @@ static void exit_notify(struct task_struct *tsk)
read_lock(&tasklist_lock);
spin_lock_irq(&tsk->sighand->siglock);
for (t = next_thread(tsk); t != tsk; t = next_thread(t))
- if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
- recalc_sigpending_tsk(t);
- if (signal_pending(t))
- signal_wake_up(t, 0);
- }
+ if (!signal_pending(t) && !(t->flags & PF_EXITING))
+ recalc_sigpending_and_wake(t);
spin_unlock_irq(&tsk->sighand->siglock);
read_unlock(&tasklist_lock);
}
@@ -895,13 +892,29 @@ fastcall NORET_TYPE void do_exit(long code)
if (unlikely(tsk->flags & PF_EXITING)) {
printk(KERN_ALERT
"Fixing recursive fault but reboot is needed!\n");
+ /*
+ * We can do this unlocked here. The futex code uses
+ * this flag just to verify whether the pi state
+ * cleanup has been done or not. In the worst case it
+ * loops once more. We pretend that the cleanup was
+ * done as there is no way to return. Either the
+ * OWNER_DIED bit is set by now or we push the blocked
+ * task into the wait for ever nirwana as well.
+ */
+ tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
+ /*
+ * tsk->flags are checked in the futex code to protect against
+ * an exiting task cleaning up the robust pi futexes.
+ */
+ spin_lock_irq(&tsk->pi_lock);
tsk->flags |= PF_EXITING;
+ spin_unlock_irq(&tsk->pi_lock);
if (unlikely(in_atomic()))
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -915,7 +928,7 @@ fastcall NORET_TYPE void do_exit(long code)
}
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
- hrtimer_cancel(&tsk->signal->real_timer);
+ hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
}
acct_collect(code, group_dead);
@@ -968,6 +981,12 @@ fastcall NORET_TYPE void do_exit(long code)
* Make sure we are holding no locks:
*/
debug_check_no_locks_held(tsk);
+ /*
+ * We can do this unlocked here. The futex code uses this flag
+ * just to verify whether the pi state cleanup has been done
+ * or not. In the worst case it loops once more.
+ */
+ tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
exit_io_context();
diff --git a/kernel/fork.c b/kernel/fork.c
index 49530e40ea8b..73ad5cda1bcd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -45,6 +45,7 @@
#include <linux/acct.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
+#include <linux/freezer.h>
#include <linux/delayacct.h>
#include <linux/taskstats_kern.h>
#include <linux/random.h>
@@ -1405,7 +1406,9 @@ long do_fork(unsigned long clone_flags,
}
if (clone_flags & CLONE_VFORK) {
+ freezer_do_not_count();
wait_for_completion(&vfork);
+ freezer_count();
if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
current->ptrace_message = nr;
ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
@@ -1427,10 +1430,8 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep,
{
struct sighand_struct *sighand = data;
- if (flags & SLAB_CTOR_CONSTRUCTOR) {
- spin_lock_init(&sighand->siglock);
- INIT_LIST_HEAD(&sighand->signalfd_list);
- }
+ spin_lock_init(&sighand->siglock);
+ INIT_LIST_HEAD(&sighand->signalfd_list);
}
void __init proc_caches_init(void)
diff --git a/kernel/futex.c b/kernel/futex.c
index b7ce15c67e32..3b7f7713d9a4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -430,10 +430,6 @@ static struct task_struct * futex_find_get_task(pid_t pid)
p = NULL;
goto out_unlock;
}
- if (p->exit_state != 0) {
- p = NULL;
- goto out_unlock;
- }
get_task_struct(p);
out_unlock:
rcu_read_unlock();
@@ -502,7 +498,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
struct futex_q *this, *next;
struct plist_head *head;
struct task_struct *p;
- pid_t pid;
+ pid_t pid = uval & FUTEX_TID_MASK;
head = &hb->chain;
@@ -520,6 +516,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return -EINVAL;
WARN_ON(!atomic_read(&pi_state->refcount));
+ WARN_ON(pid && pi_state->owner &&
+ pi_state->owner->pid != pid);
atomic_inc(&pi_state->refcount);
*ps = pi_state;
@@ -530,15 +528,33 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
/*
* We are the first waiter - try to look up the real owner and attach
- * the new pi_state to it, but bail out when the owner died bit is set
- * and TID = 0:
+ * the new pi_state to it, but bail out when TID = 0
*/
- pid = uval & FUTEX_TID_MASK;
- if (!pid && (uval & FUTEX_OWNER_DIED))
+ if (!pid)
return -ESRCH;
p = futex_find_get_task(pid);
- if (!p)
- return -ESRCH;
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ /*
+ * We need to look at the task state flags to figure out,
+ * whether the task is exiting. To protect against the do_exit
+ * change of the task flags, we do this protected by
+ * p->pi_lock:
+ */
+ spin_lock_irq(&p->pi_lock);
+ if (unlikely(p->flags & PF_EXITING)) {
+ /*
+ * The task is on the way out. When PF_EXITPIDONE is
+ * set, we know that the task has finished the
+ * cleanup:
+ */
+ int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+
+ spin_unlock_irq(&p->pi_lock);
+ put_task_struct(p);
+ return ret;
+ }
pi_state = alloc_pi_state();
@@ -551,7 +567,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
/* Store the key for possible exit cleanups: */
pi_state->key = *key;
- spin_lock_irq(&p->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &p->pi_state_list);
pi_state->owner = p;
@@ -618,6 +633,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
* preserve the owner died bit.)
*/
if (!(uval & FUTEX_OWNER_DIED)) {
+ int ret = 0;
+
newval = FUTEX_WAITERS | new_owner->pid;
/* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
newval |= (uval & FUTEX_WAITER_REQUEUED);
@@ -625,10 +642,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
pagefault_enable();
+
if (curval == -EFAULT)
- return -EFAULT;
+ ret = -EFAULT;
if (curval != uval)
- return -EINVAL;
+ ret = -EINVAL;
+ if (ret) {
+ spin_unlock(&pi_state->pi_mutex.wait_lock);
+ return ret;
+ }
}
spin_lock_irq(&pi_state->owner->pi_lock);
@@ -1174,7 +1196,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
#ifdef CONFIG_DEBUG_PI_LIST
this->list.plist.lock = &hb2->lock;
#endif
- }
+ }
this->key = key2;
get_futex_key_refs(&key2);
drop_count++;
@@ -1326,12 +1348,10 @@ static void unqueue_me_pi(struct futex_q *q)
/*
* Fixup the pi_state owner with current.
*
- * The cur->mm semaphore must be held, it is released at return of this
- * function.
+ * Must be called with hash bucket lock held and mm->sem held for non
+ * private futexes.
*/
-static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
- struct futex_q *q,
- struct futex_hash_bucket *hb,
+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *curr)
{
u32 newtid = curr->pid | FUTEX_WAITERS;
@@ -1355,23 +1375,24 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
list_add(&pi_state->list, &curr->pi_state_list);
spin_unlock_irq(&curr->pi_lock);
- /* Unqueue and drop the lock */
- unqueue_me_pi(q);
- if (fshared)
- up_read(fshared);
/*
* We own it, so we have to replace the pending owner
* TID. This must be atomic as we have preserve the
* owner died bit here.
*/
- ret = get_user(uval, uaddr);
+ ret = get_futex_value_locked(&uval, uaddr);
+
while (!ret) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
newval |= (uval & FUTEX_WAITER_REQUEUED);
+
+ pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr,
uval, newval);
+ pagefault_enable();
+
if (curval == -EFAULT)
- ret = -EFAULT;
+ ret = -EFAULT;
if (curval == uval)
break;
uval = curval;
@@ -1553,10 +1574,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
*/
uaddr = q.pi_state->key.uaddr;
- /* mmap_sem and hash_bucket lock are unlocked at
- return of this function */
- ret = fixup_pi_state_owner(uaddr, fshared,
- &q, hb, curr);
+ ret = fixup_pi_state_owner(uaddr, &q, curr);
} else {
/*
* Catch the rare case, where the lock was released
@@ -1567,12 +1585,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
if (rt_mutex_trylock(&q.pi_state->pi_mutex))
ret = 0;
}
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
}
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q);
+ if (fshared)
+ up_read(fshared);
+
debug_rt_mutex_free_waiter(&q.waiter);
return ret;
@@ -1688,7 +1707,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
struct futex_hash_bucket *hb;
u32 uval, newval, curval;
struct futex_q q;
- int ret, lock_held, attempt = 0;
+ int ret, lock_taken, ownerdied = 0, attempt = 0;
if (refill_pi_state_cache())
return -ENOMEM;
@@ -1709,10 +1728,11 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
if (unlikely(ret != 0))
goto out_release_sem;
+ retry_unlocked:
hb = queue_lock(&q, -1, NULL);
retry_locked:
- lock_held = 0;
+ ret = lock_taken = 0;
/*
* To avoid races, we attempt to take the lock here again
@@ -1728,43 +1748,44 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
if (unlikely(curval == -EFAULT))
goto uaddr_faulted;
- /* We own the lock already */
+ /*
+ * Detect deadlocks. In case of REQUEUE_PI this is a valid
+ * situation and we return success to user space.
+ */
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
- if (!detect && 0)
- force_sig(SIGKILL, current);
- /*
- * Normally, this check is done in user space.
- * In case of requeue, the owner may attempt to lock this futex,
- * even if the ownership has already been given by the previous
- * waker.
- * In the usual case, this is a case of deadlock, but not in case
- * of REQUEUE_PI.
- */
if (!(curval & FUTEX_WAITER_REQUEUED))
ret = -EDEADLK;
goto out_unlock_release_sem;
}
/*
- * Surprise - we got the lock. Just return
- * to userspace:
+ * Surprise - we got the lock. Just return to userspace:
*/
if (unlikely(!curval))
goto out_unlock_release_sem;
uval = curval;
+
/*
- * In case of a requeue, check if there already is an owner
- * If not, just take the futex.
+ * Set the WAITERS flag, so the owner will know it has someone
+ * to wake at next unlock
*/
- if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
- /* set current as futex owner */
- newval = curval | current->pid;
- lock_held = 1;
- } else
- /* Set the WAITERS flag, so the owner will know it has someone
- to wake at next unlock */
- newval = curval | FUTEX_WAITERS;
+ newval = curval | FUTEX_WAITERS;
+
+ /*
+ * There are two cases, where a futex might have no owner (the
+ * owner TID is 0): OWNER_DIED or REQUEUE. We take over the
+ * futex in this case. We also do an unconditional take over,
+ * when the owner of the futex died.
+ *
+ * This is safe as we are protected by the hash bucket lock !
+ */
+ if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
+ /* Keep the OWNER_DIED and REQUEUE bits */
+ newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+ ownerdied = 0;
+ lock_taken = 1;
+ }
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -1775,8 +1796,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
if (unlikely(curval != uval))
goto retry_locked;
- if (lock_held) {
- set_pi_futex_owner(hb, &q.key, curr);
+ /*
+ * We took the lock due to requeue or owner died take over.
+ */
+ if (unlikely(lock_taken)) {
+ /* For requeue we need to fixup the pi_futex */
+ if (curval & FUTEX_WAITER_REQUEUED)
+ set_pi_futex_owner(hb, &q.key, curr);
goto out_unlock_release_sem;
}
@@ -1787,34 +1813,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
if (unlikely(ret)) {
- /*
- * There were no waiters and the owner task lookup
- * failed. When the OWNER_DIED bit is set, then we
- * know that this is a robust futex and we actually
- * take the lock. This is safe as we are protected by
- * the hash bucket lock. We also set the waiters bit
- * unconditionally here, to simplify glibc handling of
- * multiple tasks racing to acquire the lock and
- * cleanup the problems which were left by the dead
- * owner.
- */
- if (curval & FUTEX_OWNER_DIED) {
- uval = newval;
- newval = current->pid |
- FUTEX_OWNER_DIED | FUTEX_WAITERS;
+ switch (ret) {
- pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- pagefault_enable();
+ case -EAGAIN:
+ /*
+ * Task is exiting and we just wait for the
+ * exit to complete.
+ */
+ queue_unlock(&q, hb);
+ if (fshared)
+ up_read(fshared);
+ cond_resched();
+ goto retry;
- if (unlikely(curval == -EFAULT))
+ case -ESRCH:
+ /*
+ * No owner found for this futex. Check if the
+ * OWNER_DIED bit is set to figure out whether
+ * this is a robust futex or not.
+ */
+ if (get_futex_value_locked(&curval, uaddr))
goto uaddr_faulted;
- if (unlikely(curval != uval))
+
+ /*
+ * We simply start over in case of a robust
+ * futex. The code above will take the futex
+ * and return happy.
+ */
+ if (curval & FUTEX_OWNER_DIED) {
+ ownerdied = 1;
goto retry_locked;
- ret = 0;
+ }
+ default:
+ goto out_unlock_release_sem;
}
- goto out_unlock_release_sem;
}
/*
@@ -1845,31 +1877,42 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
down_read(fshared);
spin_lock(q.lock_ptr);
- /*
- * Got the lock. We might not be the anticipated owner if we
- * did a lock-steal - fix up the PI-state in that case.
- */
- if (!ret && q.pi_state->owner != curr)
- /* mmap_sem is unlocked at return of this function */
- ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr);
- else {
+ if (!ret) {
+ /*
+ * Got the lock. We might not be the anticipated owner
+ * if we did a lock-steal - fix up the PI-state in
+ * that case:
+ */
+ if (q.pi_state->owner != curr)
+ ret = fixup_pi_state_owner(uaddr, &q, curr);
+ } else {
/*
* Catch the rare case, where the lock was released
- * when we were on the way back before we locked
- * the hash bucket.
+ * when we were on the way back before we locked the
+ * hash bucket.
*/
- if (ret && q.pi_state->owner == curr) {
- if (rt_mutex_trylock(&q.pi_state->pi_mutex))
- ret = 0;
+ if (q.pi_state->owner == curr &&
+ rt_mutex_trylock(&q.pi_state->pi_mutex)) {
+ ret = 0;
+ } else {
+ /*
+ * Paranoia check. If we did not take the lock
+ * in the trylock above, then we should not be
+ * the owner of the rtmutex, neither the real
+ * nor the pending one:
+ */
+ if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
+ printk(KERN_ERR "futex_lock_pi: ret = %d "
+ "pi-mutex: %p pi-state %p\n", ret,
+ q.pi_state->pi_mutex.owner,
+ q.pi_state->owner);
}
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q);
- if (fshared)
- up_read(fshared);
}
- if (!detect && ret == -EDEADLK && 0)
- force_sig(SIGKILL, current);
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q);
+ if (fshared)
+ up_read(fshared);
return ret != -EINTR ? ret : -ERESTARTNOINTR;
@@ -1887,16 +1930,19 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
+ *
+ * ... and hb->lock. :-) --ANK
*/
+ queue_unlock(&q, hb);
+
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared,
attempt);
if (ret)
- goto out_unlock_release_sem;
- goto retry_locked;
+ goto out_release_sem;
+ goto retry_unlocked;
}
- queue_unlock(&q, hb);
if (fshared)
up_read(fshared);
@@ -1940,9 +1986,9 @@ retry:
goto out;
hb = hash_futex(&key);
+retry_unlocked:
spin_lock(&hb->lock);
-retry_locked:
/*
* To avoid races, try to do the TID -> 0 atomic transition
* again. If it succeeds then we can return without waking
@@ -2005,16 +2051,19 @@ pi_faulted:
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
+ *
+ * ... and hb->lock. --ANK
*/
+ spin_unlock(&hb->lock);
+
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, fshared,
attempt);
if (ret)
- goto out_unlock;
- goto retry_locked;
+ goto out;
+ goto retry_unlocked;
}
- spin_unlock(&hb->lock);
if (fshared)
up_read(fshared);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 338a9b489fbc..27478948b318 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -144,20 +144,21 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec ts;
ktime_t t, *tp = NULL;
int val2 = 0;
+ int cmd = op & FUTEX_CMD_MASK;
- if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
return -EINVAL;
t = timespec_to_ktime(ts);
- if (op == FUTEX_WAIT)
+ if (cmd == FUTEX_WAIT)
t = ktime_add(ktime_get(), t);
tp = &t;
}
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
- || op == FUTEX_CMP_REQUEUE_PI)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE
+ || cmd == FUTEX_CMP_REQUEUE_PI)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b0d81aae472f..bd9e272d55e9 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -135,6 +135,39 @@ report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
}
}
+static inline int try_misrouted_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
+{
+ struct irqaction *action;
+
+ if (!irqfixup)
+ return 0;
+
+ /* We didn't actually handle the IRQ - see if it was misrouted? */
+ if (action_ret == IRQ_NONE)
+ return 1;
+
+ /*
+ * But for 'irqfixup == 2' we also do it for handled interrupts if
+ * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
+ * traditional PC timer interrupt.. Legacy)
+ */
+ if (irqfixup < 2)
+ return 0;
+
+ if (!irq)
+ return 1;
+
+ /*
+ * Since we don't get the descriptor lock, "action" can
+ * change under us. We don't really care, but we don't
+ * want to follow a NULL pointer. So tell the compiler to
+ * just load it once by using a barrier.
+ */
+ action = desc->action;
+ barrier();
+ return action && (action->flags & IRQF_IRQPOLL);
+}
+
void note_interrupt(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
@@ -144,15 +177,10 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
report_bad_irq(irq, desc, action_ret);
}
- if (unlikely(irqfixup)) {
- /* Don't punish working computers */
- if ((irqfixup == 2 && ((irq == 0) ||
- (desc->action->flags & IRQF_IRQPOLL))) ||
- action_ret == IRQ_NONE) {
- int ok = misrouted_irq(irq);
- if (action_ret == IRQ_NONE)
- desc->irqs_unhandled -= ok;
- }
+ if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
+ int ok = misrouted_irq(irq);
+ if (action_ret == IRQ_NONE)
+ desc->irqs_unhandled -= ok;
}
desc->irq_count++;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index f1bda23140b2..fed54418626c 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -257,7 +257,8 @@ const char *kallsyms_lookup(unsigned long addr,
pos = get_symbol_pos(addr, symbolsize, offset);
/* Grab name */
kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
- *modname = NULL;
+ if (modname)
+ *modname = NULL;
return namebuf;
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index df8a8e8f6ca4..bbd51b81a3e8 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -70,7 +70,7 @@ static int kthread(void *_create)
data = create->data;
/* OK, tell user we're spawned, wait for stop or wakeup */
- __set_current_state(TASK_INTERRUPTIBLE);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
complete(&create->started);
schedule();
@@ -162,7 +162,10 @@ EXPORT_SYMBOL(kthread_create);
*/
void kthread_bind(struct task_struct *k, unsigned int cpu)
{
- BUG_ON(k->state != TASK_INTERRUPTIBLE);
+ if (k->state != TASK_UNINTERRUPTIBLE) {
+ WARN_ON(1);
+ return;
+ }
/* Must have done schedule() in kthread() before we set_task_cpu */
wait_task_inactive(k);
set_task_cpu(k, cpu);
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index b5f0543ed84d..f445b9cd60fb 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -416,7 +416,8 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
mutex_lock(&pm_mutex);
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
- if (!strncmp(buf, hibernation_modes[i], len)) {
+ if (len == strlen(hibernation_modes[i])
+ && !strncmp(buf, hibernation_modes[i], len)) {
mode = i;
break;
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 40d56a31245e..8812985f3029 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -97,25 +97,26 @@ static int suspend_prepare(suspend_state_t state)
}
}
- if (pm_ops->prepare) {
- if ((error = pm_ops->prepare(state)))
- goto Thaw;
- }
-
suspend_console();
error = device_suspend(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "Some devices failed to suspend\n");
- goto Resume_devices;
+ goto Resume_console;
}
+ if (pm_ops->prepare) {
+ if ((error = pm_ops->prepare(state)))
+ goto Resume_devices;
+ }
+
error = disable_nonboot_cpus();
if (!error)
return 0;
enable_nonboot_cpus();
- Resume_devices:
pm_finish(state);
+ Resume_devices:
device_resume();
+ Resume_console:
resume_console();
Thaw:
thaw_processes();
@@ -289,13 +290,13 @@ static ssize_t state_store(struct kset *kset, const char *buf, size_t n)
len = p ? p - buf : n;
/* First, check if we are requested to hibernate */
- if (!strncmp(buf, "disk", len)) {
+ if (len == 4 && !strncmp(buf, "disk", len)) {
error = hibernate();
return error ? error : n;
}
for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
- if (*s && !strncmp(buf, *s, len))
+ if (*s && len == strlen(*s) && !strncmp(buf, *s, len))
break;
}
if (state < PM_SUSPEND_MAX && *s)
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 088419387388..e0233d8422b9 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -31,16 +31,36 @@ static inline int freezeable(struct task_struct * p)
return 1;
}
+/*
+ * freezing is complete, mark current process as frozen
+ */
+static inline void frozen_process(void)
+{
+ if (!unlikely(current->flags & PF_NOFREEZE)) {
+ current->flags |= PF_FROZEN;
+ wmb();
+ }
+ clear_tsk_thread_flag(current, TIF_FREEZE);
+}
+
/* Refrigerator is place where frozen processes are stored :-). */
void refrigerator(void)
{
/* Hmm, should we be allowed to suspend when there are realtime
processes around? */
long save;
+
+ task_lock(current);
+ if (freezing(current)) {
+ frozen_process();
+ task_unlock(current);
+ } else {
+ task_unlock(current);
+ return;
+ }
save = current->state;
pr_debug("%s entered refrigerator\n", current->comm);
- frozen_process(current);
spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* We sent fake signal, clean it up */
spin_unlock_irq(&current->sighand->siglock);
@@ -81,7 +101,7 @@ static void cancel_freezing(struct task_struct *p)
pr_debug(" clean up: %s\n", p->comm);
do_not_freeze(p);
spin_lock_irqsave(&p->sighand->siglock, flags);
- recalc_sigpending_tsk(p);
+ recalc_sigpending_and_wake(p);
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
}
@@ -112,22 +132,12 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
cancel_freezing(p);
continue;
}
- if (is_user_space(p)) {
- if (!freeze_user_space)
- continue;
-
- /* Freeze the task unless there is a vfork
- * completion pending
- */
- if (!p->vfork_done)
- freeze_process(p);
- } else {
- if (freeze_user_space)
- continue;
-
- freeze_process(p);
- }
- todo++;
+ if (freeze_user_space && !is_user_space(p))
+ continue;
+
+ freeze_process(p);
+ if (!freezer_should_skip(p))
+ todo++;
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
yield(); /* Yield is okay here */
@@ -149,13 +159,16 @@ static unsigned int try_to_freeze_tasks(int freeze_user_space)
TIMEOUT / HZ, todo);
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- if (is_user_space(p) == !freeze_user_space)
+ if (freeze_user_space && !is_user_space(p))
continue;
- if (freezeable(p) && !frozen(p))
+ task_lock(p);
+ if (freezeable(p) && !frozen(p) &&
+ !freezer_should_skip(p))
printk(KERN_ERR " %s\n", p->comm);
cancel_freezing(p);
+ task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
@@ -200,9 +213,7 @@ static void thaw_tasks(int thaw_user_space)
if (is_user_space(p) == !thaw_user_space)
continue;
- if (!thaw_process(p))
- printk(KERN_WARNING " Strange, %s not stopped\n",
- p->comm );
+ thaw_process(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b8b235cc19d1..8b1a1b837145 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -584,7 +584,7 @@ int swsusp_check(void)
resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
if (!IS_ERR(resume_bdev)) {
set_blocksize(resume_bdev, PAGE_SIZE);
- memset(swsusp_header, 0, sizeof(PAGE_SIZE));
+ memset(swsusp_header, 0, PAGE_SIZE);
error = bio_read_page(swsusp_resume_block,
swsusp_header, NULL);
if (error)
diff --git a/kernel/profile.c b/kernel/profile.c
index cc91b9bf759d..5b20fe977bed 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -26,6 +26,7 @@
#include <asm/sections.h>
#include <asm/semaphore.h>
#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
struct profile_hit {
u32 pc, hits;
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 12879f6c1ec3..a6fbb4130521 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -189,6 +189,19 @@ int rt_mutex_adjust_prio_chain(struct task_struct *task,
if (!waiter || !waiter->task)
goto out_unlock_pi;
+ /*
+ * Check the orig_waiter state. After we dropped the locks,
+ * the previous owner of the lock might have released the lock
+ * and made us the pending owner:
+ */
+ if (orig_waiter && !orig_waiter->task)
+ goto out_unlock_pi;
+
+ /*
+ * Drop out, when the task has no waiters. Note,
+ * top_waiter can be NULL, when we are in the deboosting
+ * mode!
+ */
if (top_waiter && (!task_has_pi_waiters(task) ||
top_waiter != task_top_pi_waiter(task)))
goto out_unlock_pi;
@@ -636,9 +649,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
* all over without going into schedule to try
* to get the lock now:
*/
- if (unlikely(!waiter.task))
+ if (unlikely(!waiter.task)) {
+ /*
+ * Reset the return value. We might
+ * have returned with -EDEADLK and the
+ * owner released the lock while we
+ * were walking the pi chain.
+ */
+ ret = 0;
continue;
-
+ }
if (unlikely(ret))
break;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 799d23b4e35d..13cdab3b4c48 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4775,9 +4775,7 @@ int __sched cond_resched_softirq(void)
BUG_ON(!in_softirq());
if (need_resched() && system_state == SYSTEM_RUNNING) {
- raw_local_irq_disable();
- _local_bh_enable();
- raw_local_irq_enable();
+ local_bh_enable();
__cond_resched();
local_bh_disable();
return 1;
diff --git a/kernel/signal.c b/kernel/signal.c
index 364fc95bf97c..fe590e00db8d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -96,20 +96,38 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
-fastcall void recalc_sigpending_tsk(struct task_struct *t)
+static int recalc_sigpending_tsk(struct task_struct *t)
{
if (t->signal->group_stop_count > 0 ||
(freezing(t)) ||
PENDING(&t->pending, &t->blocked) ||
- PENDING(&t->signal->shared_pending, &t->blocked))
+ PENDING(&t->signal->shared_pending, &t->blocked)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
- else
- clear_tsk_thread_flag(t, TIF_SIGPENDING);
+ return 1;
+ }
+ /*
+ * We must never clear the flag in another thread, or in current
+ * when it's possible the current syscall is returning -ERESTART*.
+ * So we don't clear it here, and only callers who know they should do.
+ */
+ return 0;
+}
+
+/*
+ * After recalculating TIF_SIGPENDING, we need to make sure the task wakes up.
+ * This is superfluous when called on current, the wakeup is a harmless no-op.
+ */
+void recalc_sigpending_and_wake(struct task_struct *t)
+{
+ if (recalc_sigpending_tsk(t))
+ signal_wake_up(t, 0);
}
void recalc_sigpending(void)
{
- recalc_sigpending_tsk(current);
+ if (!recalc_sigpending_tsk(current))
+ clear_thread_flag(TIF_SIGPENDING);
+
}
/* Given the mask, find the first available signal that should be serviced. */
@@ -373,7 +391,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
}
}
}
- recalc_sigpending_tsk(tsk);
+ if (likely(tsk == current))
+ recalc_sigpending();
if (signr && unlikely(sig_kernel_stop(signr))) {
/*
* Set a marker that we have dequeued a stop signal. Our
@@ -744,7 +763,7 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
action->sa.sa_handler = SIG_DFL;
if (blocked) {
sigdelset(&t->blocked, sig);
- recalc_sigpending_tsk(t);
+ recalc_sigpending_and_wake(t);
}
}
ret = specific_send_sig_info(sig, info, t);
@@ -1568,8 +1587,9 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
/*
* Queued signals ignored us while we were stopped for tracing.
* So check for any that we should take before resuming user mode.
+ * This sets TIF_SIGPENDING, but never clears it.
*/
- recalc_sigpending();
+ recalc_sigpending_tsk(current);
}
void ptrace_notify(int exit_code)
@@ -2273,7 +2293,7 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
rm_from_queue_full(&mask, &t->signal->shared_pending);
do {
rm_from_queue_full(&mask, &t->pending);
- recalc_sigpending_tsk(t);
+ recalc_sigpending_and_wake(t);
t = next_thread(t);
} while (t != current);
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4073353abd4f..30ee462ee79f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -227,7 +227,7 @@ static ctl_table kern_table[] = {
.ctl_name = KERN_CORE_PATTERN,
.procname = "core_pattern",
.data = core_pattern,
- .maxlen = 128,
+ .maxlen = CORENAME_MAX_SIZE,
.mode = 0644,
.proc_handler = &proc_dostring,
.strategy = &sysctl_string,
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3db5c3c460d7..51b6a6a6158c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -74,7 +74,7 @@ static struct clocksource *watchdog;
static struct timer_list watchdog_timer;
static DEFINE_SPINLOCK(watchdog_lock);
static cycle_t watchdog_last;
-static int watchdog_resumed;
+static unsigned long watchdog_resumed;
/*
* Interval: 0.5sec Threshold: 0.0625s
@@ -104,9 +104,7 @@ static void clocksource_watchdog(unsigned long data)
spin_lock(&watchdog_lock);
- resumed = watchdog_resumed;
- if (unlikely(resumed))
- watchdog_resumed = 0;
+ resumed = test_and_clear_bit(0, &watchdog_resumed);
wdnow = watchdog->read();
wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
@@ -151,9 +149,7 @@ static void clocksource_watchdog(unsigned long data)
}
static void clocksource_resume_watchdog(void)
{
- spin_lock(&watchdog_lock);
- watchdog_resumed = 1;
- spin_unlock(&watchdog_lock);
+ set_bit(0, &watchdog_resumed);
}
static void clocksource_check_watchdog(struct clocksource *cs)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index cb25649c6f50..87aa5ff931e0 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -11,6 +11,8 @@
#include <linux/mm.h>
#include <linux/time.h>
#include <linux/timex.h>
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
#include <asm/div64.h>
#include <asm/timex.h>
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index eadfce2fff74..8001d37071f5 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -243,11 +243,18 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
{
int cpu = get_cpu();
- if (cpu == *oncpu)
- tick_do_broadcast_on_off(&reason);
- else
- smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
- &reason, 1, 1);
+ if (!cpu_isset(*oncpu, cpu_online_map)) {
+ printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
+ "offline CPU #%d\n", *oncpu);
+ } else {
+
+ if (cpu == *oncpu)
+ tick_do_broadcast_on_off(&reason);
+ else
+ smp_call_function_single(*oncpu,
+ tick_do_broadcast_on_off,
+ &reason, 1, 1);
+ }
put_cpu();
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3483e6cb9549..52db9e3c526e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -167,9 +167,15 @@ void tick_nohz_stop_sched_tick(void)
goto end;
cpu = smp_processor_id();
- if (unlikely(local_softirq_pending()))
- printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
- local_softirq_pending());
+ if (unlikely(local_softirq_pending())) {
+ static int ratelimit;
+
+ if (ratelimit < 10) {
+ printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+ local_softirq_pending());
+ ratelimit++;
+ }
+ }
now = ktime_get();
/*
@@ -241,6 +247,21 @@ void tick_nohz_stop_sched_tick(void)
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = -1;
+ ts->idle_sleeps++;
+
+ /*
+ * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
+ * there is no timer pending or at least extremly far
+ * into the future (12 days for HZ=1000). In this case
+ * we simply stop the tick timer:
+ */
+ if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
+ ts->idle_expires.tv64 = KTIME_MAX;
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ hrtimer_cancel(&ts->sched_timer);
+ goto out;
+ }
+
/*
* calculate the expiry time for the next timer wheel
* timer
@@ -248,7 +269,6 @@ void tick_nohz_stop_sched_tick(void)
expires = ktime_add_ns(last_update, tick_period.tv64 *
delta_jiffies);
ts->idle_expires = expires;
- ts->idle_sleeps++;
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f9217bf644f6..3d1042f82a68 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -273,6 +273,8 @@ static int timekeeping_resume(struct sys_device *dev)
unsigned long flags;
unsigned long now = read_persistent_clock();
+ clocksource_resume();
+
write_seqlock_irqsave(&xtime_lock, flags);
if (now && (now > timekeeping_suspend_time)) {
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 868f1bceb07f..321693724ad7 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -117,21 +117,6 @@ static struct entry entries[MAX_ENTRIES];
static atomic_t overflow_count;
-static void reset_entries(void)
-{
- nr_entries = 0;
- memset(entries, 0, sizeof(entries));
- atomic_set(&overflow_count, 0);
-}
-
-static struct entry *alloc_entry(void)
-{
- if (nr_entries >= MAX_ENTRIES)
- return NULL;
-
- return entries + nr_entries++;
-}
-
/*
* The entries are in a hash-table, for fast lookup:
*/
@@ -149,6 +134,22 @@ static struct entry *alloc_entry(void)
static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
+static void reset_entries(void)
+{
+ nr_entries = 0;
+ memset(entries, 0, sizeof(entries));
+ memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
+ atomic_set(&overflow_count, 0);
+}
+
+static struct entry *alloc_entry(void)
+{
+ if (nr_entries >= MAX_ENTRIES)
+ return NULL;
+
+ return entries + nr_entries++;
+}
+
static int match_entries(struct entry *entry1, struct entry *entry2)
{
return entry1->timer == entry2->timer &&
@@ -202,12 +203,15 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
if (curr) {
*curr = *entry;
curr->count = 0;
+ curr->next = NULL;
memcpy(curr->comm, comm, TASK_COMM_LEN);
+
+ smp_mb(); /* Ensure that curr is initialized before insert */
+
if (prev)
prev->next = curr;
else
*head = curr;
- curr->next = NULL;
}
out_unlock:
spin_unlock(&table_lock);
@@ -232,10 +236,15 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
/*
* It doesnt matter which lock we take:
*/
- spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
+ spinlock_t *lock;
struct entry *entry, input;
unsigned long flags;
+ if (likely(!active))
+ return;
+
+ lock = &per_cpu(lookup_lock, raw_smp_processor_id());
+
input.timer = timer;
input.start_func = startf;
input.expire_func = timerf;
@@ -360,6 +369,7 @@ static ssize_t tstats_write(struct file *file, const char __user *buf,
if (!active) {
reset_entries();
time_start = ktime_get();
+ smp_mb();
active = 1;
}
break;
diff --git a/kernel/timer.c b/kernel/timer.c
index a6c580ac084b..1a69705c2fb9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -666,7 +666,7 @@ static inline void __run_timers(tvec_base_t *base)
static unsigned long __next_timer_interrupt(tvec_base_t *base)
{
unsigned long timer_jiffies = base->timer_jiffies;
- unsigned long expires = timer_jiffies + (LONG_MAX >> 1);
+ unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
int index, slot, array, found = 0;
struct timer_list *nte;
tvec_t *varray[4];
@@ -752,6 +752,14 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
tsdelta = ktime_to_timespec(hr_delta);
delta = timespec_to_jiffies(&tsdelta);
+
+ /*
+ * Limit the delta to the max value, which is checked in
+ * tick_nohz_stop_sched_tick():
+ */
+ if (delta > NEXT_TIMER_MAX_DELTA)
+ delta = NEXT_TIMER_MAX_DELTA;
+
/*
* Take rounding errors in to account and make sure, that it
* expires in the next tick. Otherwise we go into an endless
@@ -1499,8 +1507,6 @@ unregister_time_interpolator(struct time_interpolator *ti)
prev = &curr->next;
}
- clocksource_resume();
-
write_seqlock_irqsave(&xtime_lock, flags);
if (ti == time_interpolator) {
/* we lost the best time-interpolator: */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fb56fedd5c02..3bebf73be976 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -47,7 +47,6 @@ struct cpu_workqueue_struct {
struct workqueue_struct *wq;
struct task_struct *thread;
- int should_stop;
int run_depth; /* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;
@@ -71,7 +70,13 @@ static LIST_HEAD(workqueues);
static int singlethread_cpu __read_mostly;
static cpumask_t cpu_singlethread_map __read_mostly;
-/* optimization, we could use cpu_possible_map */
+/*
+ * _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
+ * flushes cwq->worklist. This means that flush_workqueue/wait_on_work
+ * which comes in between can't use for_each_online_cpu(). We could
+ * use cpu_possible_map, the cpumask below is more a documentation
+ * than optimization.
+ */
static cpumask_t cpu_populated_map __read_mostly;
/* If it's single threaded, it isn't in the list of workqueues. */
@@ -272,24 +277,6 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
spin_unlock_irq(&cwq->lock);
}
-/*
- * NOTE: the caller must not touch *cwq if this func returns true
- */
-static int cwq_should_stop(struct cpu_workqueue_struct *cwq)
-{
- int should_stop = cwq->should_stop;
-
- if (unlikely(should_stop)) {
- spin_lock_irq(&cwq->lock);
- should_stop = cwq->should_stop && list_empty(&cwq->worklist);
- if (should_stop)
- cwq->thread = NULL;
- spin_unlock_irq(&cwq->lock);
- }
-
- return should_stop;
-}
-
static int worker_thread(void *__cwq)
{
struct cpu_workqueue_struct *cwq = __cwq;
@@ -302,14 +289,15 @@ static int worker_thread(void *__cwq)
for (;;) {
prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
- if (!freezing(current) && !cwq->should_stop
- && list_empty(&cwq->worklist))
+ if (!freezing(current) &&
+ !kthread_should_stop() &&
+ list_empty(&cwq->worklist))
schedule();
finish_wait(&cwq->more_work, &wait);
try_to_freeze();
- if (cwq_should_stop(cwq))
+ if (kthread_should_stop())
break;
run_workqueue(cwq);
@@ -340,18 +328,21 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
insert_work(cwq, &barr->work, tail);
}
-static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
+static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
{
+ int active;
+
if (cwq->thread == current) {
/*
* Probably keventd trying to flush its own queue. So simply run
* it by hand rather than deadlocking.
*/
run_workqueue(cwq);
+ active = 1;
} else {
struct wq_barrier barr;
- int active = 0;
+ active = 0;
spin_lock_irq(&cwq->lock);
if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
insert_wq_barrier(cwq, &barr, 1);
@@ -362,6 +353,8 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
if (active)
wait_for_completion(&barr.done);
}
+
+ return active;
}
/**
@@ -674,7 +667,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
return PTR_ERR(p);
cwq->thread = p;
- cwq->should_stop = 0;
return 0;
}
@@ -740,29 +732,27 @@ EXPORT_SYMBOL_GPL(__create_workqueue);
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
- struct wq_barrier barr;
- int alive = 0;
-
- spin_lock_irq(&cwq->lock);
- if (cwq->thread != NULL) {
- insert_wq_barrier(cwq, &barr, 1);
- cwq->should_stop = 1;
- alive = 1;
- }
- spin_unlock_irq(&cwq->lock);
+ /*
+ * Our caller is either destroy_workqueue() or CPU_DEAD,
+ * workqueue_mutex protects cwq->thread
+ */
+ if (cwq->thread == NULL)
+ return;
- if (alive) {
- wait_for_completion(&barr.done);
+ /*
+ * If the caller is CPU_DEAD the single flush_cpu_workqueue()
+ * is not enough, a concurrent flush_workqueue() can insert a
+ * barrier after us.
+ * When ->worklist becomes empty it is safe to exit because no
+ * more work_structs can be queued on this cwq: flush_workqueue
+ * checks list_empty(), and a "normal" queue_work() can't use
+ * a dead CPU.
+ */
+ while (flush_cpu_workqueue(cwq))
+ ;
- while (unlikely(cwq->thread != NULL))
- cpu_relax();
- /*
- * Wait until cwq->thread unlocks cwq->lock,
- * it won't touch *cwq after that.
- */
- smp_rmb();
- spin_unlock_wait(&cwq->lock);
- }
+ kthread_stop(cwq->thread);
+ cwq->thread = NULL;
}
/**