From e5cba24e3f018d4beb6acd101a82483c98f91ce7 Mon Sep 17 00:00:00 2001
From: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Date: Fri, 26 Nov 2010 12:06:44 +0100
Subject: workqueue: check the allocation of system_unbound_wq

I found a trivial bug on initialization of workqueue.
Current init_workqueues doesn't check the result of
allocation of system_unbound_wq, this should be checked
like other queues.

Signed-off-by: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90db1bd1a978..ca017ce8bc6b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3692,7 +3692,8 @@ static int __init init_workqueues(void)
 	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
-	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
+	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
+	       !system_unbound_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From 2d64672ed38721b7a3815009d79bfb90a1f34a17 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 3 Dec 2010 23:12:33 -0500
Subject: workqueue: It is likely that WORKER_NOT_RUNNING is true

Running the annotate branch profiler on three boxes, including my
main box that runs firefox, evolution, xchat, and is part of the distcc farm,
showed this with the likelys in the workqueue code:

 correct incorrect  %        Function                  File              Line
 ------- ---------  -        --------                  ----              ----
      96   996253  99 wq_worker_sleeping             workqueue.c          703
      96   996247  99 wq_worker_waking_up            workqueue.c          677

The likely()s in this case were assuming that WORKER_NOT_RUNNING will
most likely be false. But this is not the case. The reason is
(and shown by adding trace_printks and testing it) that most of the time
WORKER_PREP is set.

In worker_thread() we have:

	worker_clr_flags(worker, WORKER_PREP);

	[ do work stuff ]

	worker_set_flags(worker, WORKER_PREP, false);

(that 'false' means not to wake up an idle worker)

The wq_worker_sleeping() is called from schedule when a worker thread
is putting itself to sleep. Which happens most of the time outside
of that [ do work stuff ].

The wq_worker_waking_up is called by the wakeup worker code, which
is also callod outside that [ do work stuff ].

Thus, the likely and unlikely used by those two functions are actually
backwards.

Remove the annotation and let gcc figure it out.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca017ce8bc6b..e785b0f2aea5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
 {
 	struct worker *worker = kthread_data(task);
 
-	if (likely(!(worker->flags & WORKER_NOT_RUNNING)))
+	if (!(worker->flags & WORKER_NOT_RUNNING))
 		atomic_inc(get_gcwq_nr_running(cpu));
 }
 
@@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 	struct global_cwq *gcwq = get_gcwq(cpu);
 	atomic_t *nr_running = get_gcwq_nr_running(cpu);
 
-	if (unlikely(worker->flags & WORKER_NOT_RUNNING))
+	if (worker->flags & WORKER_NOT_RUNNING)
 		return NULL;
 
 	/* this can only happen on the local cpu */
-- 
cgit v1.2.3


From c8efcc2589464ac70255bb83e10cad61c7c6d295 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 20 Dec 2010 19:32:04 +0100
Subject: workqueue: allow chained queueing during destruction

Currently, destroy_workqueue() makes the workqueue deny all new
queueing by setting WQ_DYING and flushes the workqueue once before
proceeding with destruction; however, there are cases where work items
queue more related work items.  Currently, such users need to
explicitly flush the workqueue multiple times depending on the
possible depth of such chained queueing.

This patch updates the queueing path such that a work item can queue
further work items on the same workqueue even when WQ_DYING is set.
The flush on destruction is automatically retried until the workqueue
is empty.  This guarantees that the workqueue is empty on destruction
while allowing chained queueing.

The flush retry logic whines if it takes too many retries to drain the
workqueue.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 kernel/workqueue.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

(limited to 'kernel/workqueue.c')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e785b0f2aea5..8ee6ec82f88a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -932,6 +932,38 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 		wake_up_worker(gcwq);
 }
 
+/*
+ * Test whether @work is being queued from another work executing on the
+ * same workqueue.  This is rather expensive and should only be used from
+ * cold paths.
+ */
+static bool is_chained_work(struct workqueue_struct *wq)
+{
+	unsigned long flags;
+	unsigned int cpu;
+
+	for_each_gcwq_cpu(cpu) {
+		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct worker *worker;
+		struct hlist_node *pos;
+		int i;
+
+		spin_lock_irqsave(&gcwq->lock, flags);
+		for_each_busy_worker(worker, i, pos, gcwq) {
+			if (worker->task != current)
+				continue;
+			spin_unlock_irqrestore(&gcwq->lock, flags);
+			/*
+			 * I'm @worker, no locking necessary.  See if @work
+			 * is headed to the same workqueue.
+			 */
+			return worker->current_cwq->wq == wq;
+		}
+		spin_unlock_irqrestore(&gcwq->lock, flags);
+	}
+	return false;
+}
+
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -943,7 +975,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
-	if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+	/* if dying, only works from the same workqueue are allowed */
+	if (unlikely(wq->flags & WQ_DYING) &&
+	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
 	/* determine gcwq to use */
@@ -2936,11 +2970,35 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
  */
 void destroy_workqueue(struct workqueue_struct *wq)
 {
+	unsigned int flush_cnt = 0;
 	unsigned int cpu;
 
+	/*
+	 * Mark @wq dying and drain all pending works.  Once WQ_DYING is
+	 * set, only chain queueing is allowed.  IOW, only currently
+	 * pending or running work items on @wq can queue further work
+	 * items on it.  @wq is flushed repeatedly until it becomes empty.
+	 * The number of flushing is detemined by the depth of chaining and
+	 * should be relatively short.  Whine if it takes too long.
+	 */
 	wq->flags |= WQ_DYING;
+reflush:
 	flush_workqueue(wq);
 
+	for_each_cwq_cpu(cpu, wq) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+		if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+			continue;
+
+		if (++flush_cnt == 10 ||
+		    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
+			printk(KERN_WARNING "workqueue %s: flush on "
+			       "destruction isn't complete after %u tries\n",
+			       wq->name, flush_cnt);
+		goto reflush;
+	}
+
 	/*
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
-- 
cgit v1.2.3