From fca839c00a12d682cb59b3b620d109a1d850b262 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 10:58:57 -0500 Subject: workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue Task or work item involved in memory reclaim trying to flush a non-WQ_MEM_RECLAIM workqueue or one of its work items can lead to deadlock. Trigger WARN_ONCE() if such conditions are detected. Signed-off-by: Tejun Heo Cc: Peter Zijlstra --- kernel/workqueue.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c579dbab2e36..c7769c507bf5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2316,6 +2316,37 @@ repeat: goto repeat; } +/** + * check_flush_dependency - check for flush dependency sanity + * @target_wq: workqueue being flushed + * @target_work: work item being flushed (NULL for workqueue flushes) + * + * %current is trying to flush the whole @target_wq or @target_work on it. + * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not + * reclaiming memory or running on a workqueue which doesn't have + * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to + * a deadlock. + */ +static void check_flush_dependency(struct workqueue_struct *target_wq, + struct work_struct *target_work) +{ + work_func_t target_func = target_work ? target_work->func : NULL; + struct worker *worker; + + if (target_wq->flags & WQ_MEM_RECLAIM) + return; + + worker = current_wq_worker(); + + WARN_ONCE(current->flags & PF_MEMALLOC, + "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", + current->pid, current->comm, target_wq->name, target_func); + WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM), + "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", + worker->current_pwq->wq->name, worker->current_func, + target_wq->name, target_func); +} + struct wq_barrier { struct work_struct work; struct completion done; @@ -2525,6 +2556,8 @@ void flush_workqueue(struct workqueue_struct *wq) list_add_tail(&this_flusher.list, &wq->flusher_overflow); } + check_flush_dependency(wq, NULL); + mutex_unlock(&wq->mutex); wait_for_completion(&this_flusher.done); @@ -2697,6 +2730,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) pwq = worker->current_pwq; } + check_flush_dependency(pwq->wq, work); + insert_wq_barrier(pwq, barr, work, worker); spin_unlock_irq(&pool->lock); -- cgit v1.2.3 From 82607adcf9cdf40fb7b5331269780c8f70ec6e35 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Dec 2015 11:28:04 -0500 Subject: workqueue: implement lockup detector Workqueue stalls can happen from a variety of usage bugs such as missing WQ_MEM_RECLAIM flag or concurrency managed work item indefinitely staying RUNNING. These stalls can be extremely difficult to hunt down because the usual warning mechanisms can't detect workqueue stalls and the internal state is pretty opaque. To alleviate the situation, this patch implements workqueue lockup detector. It periodically monitors all worker_pools periodically and, if any pool failed to make forward progress longer than the threshold duration, triggers warning and dumps workqueue state as follows. BUG: workqueue lockup - pool cpus=0 node=0 flags=0x0 nice=0 stuck for 31s! Showing busy workqueues and worker pools: workqueue events: flags=0x0 pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=17/256 pending: monkey_wrench_fn, e1000_watchdog, cache_reap, vmstat_shepherd, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, cgroup_release_agent workqueue events_power_efficient: flags=0x80 pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256 pending: check_lifetime, neigh_periodic_work workqueue cgroup_pidlist_destroy: flags=0x0 pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/1 pending: cgroup_pidlist_destroy_work_fn ... The detection mechanism is controller through kernel parameter workqueue.watchdog_thresh and can be updated at runtime through the sysfs module parameter file. v2: Decoupled from softlockup control knobs. Signed-off-by: Tejun Heo Acked-by: Don Zickus Cc: Ulrich Obergfell Cc: Michal Hocko Cc: Chris Mason Cc: Andrew Morton --- kernel/workqueue.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 3 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c7769c507bf5..1ecb588aae07 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -148,6 +148,8 @@ struct worker_pool { int id; /* I: pool ID */ unsigned int flags; /* X: flags */ + unsigned long watchdog_ts; /* L: watchdog timestamp */ + struct list_head worklist; /* L: list of pending works */ int nr_workers; /* L: total number of workers */ @@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work) struct pool_workqueue *pwq = get_work_pwq(work); trace_workqueue_activate_work(work); + if (list_empty(&pwq->pool->worklist)) + pwq->pool->watchdog_ts = jiffies; move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); pwq->nr_active++; @@ -1385,6 +1389,8 @@ retry: trace_workqueue_activate_work(work); pwq->nr_active++; worklist = &pwq->pool->worklist; + if (list_empty(worklist)) + pwq->pool->watchdog_ts = jiffies; } else { work_flags |= WORK_STRUCT_DELAYED; worklist = &pwq->delayed_works; @@ -2157,6 +2163,8 @@ recheck: list_first_entry(&pool->worklist, struct work_struct, entry); + pool->watchdog_ts = jiffies; + if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) { /* optimization path, not strictly necessary */ process_one_work(worker, work); @@ -2240,6 +2248,7 @@ repeat: struct pool_workqueue, mayday_node); struct worker_pool *pool = pwq->pool; struct work_struct *work, *n; + bool first = true; __set_current_state(TASK_RUNNING); list_del_init(&pwq->mayday_node); @@ -2256,9 +2265,14 @@ repeat: * process'em. */ WARN_ON_ONCE(!list_empty(scheduled)); - list_for_each_entry_safe(work, n, &pool->worklist, entry) - if (get_work_pwq(work) == pwq) + list_for_each_entry_safe(work, n, &pool->worklist, entry) { + if (get_work_pwq(work) == pwq) { + if (first) + pool->watchdog_ts = jiffies; move_linked_works(work, scheduled, &n); + } + first = false; + } if (!list_empty(scheduled)) { process_scheduled_works(rescuer); @@ -3104,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool) pool->cpu = -1; pool->node = NUMA_NO_NODE; pool->flags |= POOL_DISASSOCIATED; + pool->watchdog_ts = jiffies; INIT_LIST_HEAD(&pool->worklist); INIT_LIST_HEAD(&pool->idle_list); hash_init(pool->busy_hash); @@ -4343,7 +4358,9 @@ void show_workqueue_state(void) pr_info("pool %d:", pool->id); pr_cont_pool_info(pool); - pr_cont(" workers=%d", pool->nr_workers); + pr_cont(" hung=%us workers=%d", + jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, + pool->nr_workers); if (pool->manager) pr_cont(" manager: %d", task_pid_nr(pool->manager->task)); @@ -5202,6 +5219,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq) static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { } #endif /* CONFIG_SYSFS */ +/* + * Workqueue watchdog. + * + * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal + * flush dependency, a concurrency managed work item which stays RUNNING + * indefinitely. Workqueue stalls can be very difficult to debug as the + * usual warning mechanisms don't trigger and internal workqueue state is + * largely opaque. + * + * Workqueue watchdog monitors all worker pools periodically and dumps + * state if some pools failed to make forward progress for a while where + * forward progress is defined as the first item on ->worklist changing. + * + * This mechanism is controlled through the kernel parameter + * "workqueue.watchdog_thresh" which can be updated at runtime through the + * corresponding sysfs parameter file. + */ +#ifdef CONFIG_WQ_WATCHDOG + +static void wq_watchdog_timer_fn(unsigned long data); + +static unsigned long wq_watchdog_thresh = 30; +static struct timer_list wq_watchdog_timer = + TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0); + +static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; + +static void wq_watchdog_reset_touched(void) +{ + int cpu; + + wq_watchdog_touched = jiffies; + for_each_possible_cpu(cpu) + per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; +} + +static void wq_watchdog_timer_fn(unsigned long data) +{ + unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; + bool lockup_detected = false; + struct worker_pool *pool; + int pi; + + if (!thresh) + return; + + rcu_read_lock(); + + for_each_pool(pool, pi) { + unsigned long pool_ts, touched, ts; + + if (list_empty(&pool->worklist)) + continue; + + /* get the latest of pool and touched timestamps */ + pool_ts = READ_ONCE(pool->watchdog_ts); + touched = READ_ONCE(wq_watchdog_touched); + + if (time_after(pool_ts, touched)) + ts = pool_ts; + else + ts = touched; + + if (pool->cpu >= 0) { + unsigned long cpu_touched = + READ_ONCE(per_cpu(wq_watchdog_touched_cpu, + pool->cpu)); + if (time_after(cpu_touched, ts)) + ts = cpu_touched; + } + + /* did we stall? */ + if (time_after(jiffies, ts + thresh)) { + lockup_detected = true; + pr_emerg("BUG: workqueue lockup - pool"); + pr_cont_pool_info(pool); + pr_cont(" stuck for %us!\n", + jiffies_to_msecs(jiffies - pool_ts) / 1000); + } + } + + rcu_read_unlock(); + + if (lockup_detected) + show_workqueue_state(); + + wq_watchdog_reset_touched(); + mod_timer(&wq_watchdog_timer, jiffies + thresh); +} + +void wq_watchdog_touch(int cpu) +{ + if (cpu >= 0) + per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; + else + wq_watchdog_touched = jiffies; +} + +static void wq_watchdog_set_thresh(unsigned long thresh) +{ + wq_watchdog_thresh = 0; + del_timer_sync(&wq_watchdog_timer); + + if (thresh) { + wq_watchdog_thresh = thresh; + wq_watchdog_reset_touched(); + mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ); + } +} + +static int wq_watchdog_param_set_thresh(const char *val, + const struct kernel_param *kp) +{ + unsigned long thresh; + int ret; + + ret = kstrtoul(val, 0, &thresh); + if (ret) + return ret; + + if (system_wq) + wq_watchdog_set_thresh(thresh); + else + wq_watchdog_thresh = thresh; + + return 0; +} + +static const struct kernel_param_ops wq_watchdog_thresh_ops = { + .set = wq_watchdog_param_set_thresh, + .get = param_get_ulong, +}; + +module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh, + 0644); + +static void wq_watchdog_init(void) +{ + wq_watchdog_set_thresh(wq_watchdog_thresh); +} + +#else /* CONFIG_WQ_WATCHDOG */ + +static inline void wq_watchdog_init(void) { } + +#endif /* CONFIG_WQ_WATCHDOG */ + static void __init wq_numa_init(void) { cpumask_var_t *tbl; @@ -5325,6 +5490,9 @@ static int __init init_workqueues(void) !system_unbound_wq || !system_freezable_wq || !system_power_efficient_wq || !system_freezable_power_efficient_wq); + + wq_watchdog_init(); + return 0; } early_initcall(init_workqueues); -- cgit v1.2.3 From 6201171e3b2c02992e62448636631a0dfe4e9d20 Mon Sep 17 00:00:00 2001 From: wanghaibin Date: Thu, 7 Jan 2016 20:38:59 +0800 Subject: workqueue: simplify the apply_workqueue_attrs_locked() If the apply_wqattrs_prepare() returns NULL, it has already cleaned up the related resources, so it can return directly and avoid calling the clean up function again. This doesn't introduce any functional changes. Signed-off-by: wanghaibin Signed-off-by: Tejun Heo --- kernel/workqueue.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1ecb588aae07..61a0264e28f9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3651,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { struct apply_wqattrs_ctx *ctx; - int ret = -ENOMEM; /* only unbound workqueues can change attributes */ if (WARN_ON(!(wq->flags & WQ_UNBOUND))) @@ -3662,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, return -EINVAL; ctx = apply_wqattrs_prepare(wq, attrs); + if (!ctx) + return -ENOMEM; /* the ctx has been prepared successfully, let's commit it */ - if (ctx) { - apply_wqattrs_commit(ctx); - ret = 0; - } - + apply_wqattrs_commit(ctx); apply_wqattrs_cleanup(ctx); - return ret; + return 0; } /** -- cgit v1.2.3