From 84f9f938be4156e4baea466688bd6abae1c9e6ba Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben@bwidawsk.net>
Date: Mon, 12 Dec 2011 19:21:58 -0800
Subject: drm/i915: Force sync command ordering (Gen6+)

The docs say this is required for Gen7, and since the bit was added for
Gen6, we are also setting it there pit pf paranoia. Particularly as
Chris points out, if PIPE_CONTROL counts as a 3d state packet.

This was found through doc inspection by Ken and applies to Gen6+;

Reported-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ca70e2f10445..f5dae5deca71 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -414,6 +414,11 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 			return ret;
 	}
 
+	if (INTEL_INFO(dev)->gen >= 6) {
+		I915_WRITE(INSTPM,
+			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 42ff6572e5a4a7414330a4ca91f0335da67deca9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Thu, 22 Dec 2011 14:55:00 -0800
Subject: drm/i915: Work around gen7 BLT ring synchronization issues.

Previous to this commit, testing easily reproduced a failure where the
seqno would apparently arrive after the IRQ associated with it, with test programs as simple as:

for (;;) {
    glCopyPixels(0, 0, 1, 1);
    glFinish();
}

Various workarounds we've seen for previous generations didn't work to
fix this issue, so until new information comes in, replace the IRQ
waits on the BLT ring with polling.

Signed-off-by: Eric Anholt <eric@anholt.net>
Tested-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f5dae5deca71..d0eb2280d8d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -791,6 +791,17 @@ ring_add_request(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+static bool
+gen7_blt_ring_get_irq(struct intel_ring_buffer *ring)
+{
+	/* The BLT ring on IVB appears to have broken synchronization
+	 * between the seqno write and the interrupt, so that the
+	 * interrupt appears first.  Returning false here makes
+	 * i915_wait_request() do a polling loop, instead.
+	 */
+	return false;
+}
+
 static bool
 gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
 {
@@ -1557,5 +1568,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 
 	*ring = gen6_blt_ring;
 
+	if (IS_GEN7(dev))
+		ring->irq_get = gen7_blt_ring_get_irq;
+
 	return intel_init_ring_buffer(dev, ring);
 }
-- 
cgit v1.2.3


From e6bfaf854272ec4641a9ef7b1cb1ca963031ba95 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 14 Dec 2011 13:56:59 +0100
Subject: drm/i915: don't bail out of intel_wait_ring_buffer too early

In the pre-gem days with non-existing hangcheck and gpu reset code,
this timeout of 3 seconds was pretty important to avoid stuck
processes.

But now we have the hangcheck code in gem that goes to great length
to ensure that the gpu is really dead before declaring it wedged.

So there's no need for this timeout anymore. Actually it's even harmful
because we can bail out too early (e.g. with xscreensaver slip)
when running giant batchbuffers. And our code isn't robust enough
to properly unroll any state-changes, we pretty much rely on the gpu
reset code cleaning up the mess (like cache tracking, fencing state,
active list/request tracking, ...).

With this change intel_begin_ring can only fail when the gpu is
wedged, and it will return -EAGAIN (like wait_request in case the
gpu reset is still outstanding).

v2: Chris Wilson noted that on resume timers aren't running and hence
we won't ever get kicked out of this loop by the hangcheck code. Use
an insanely large timeout instead for the HAS_GEM case to prevent
resume bugs from totally hanging the machine.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d0eb2280d8d9..77e729d4e4f0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1135,7 +1135,16 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
 	}
 
 	trace_i915_ring_wait_begin(ring);
-	end = jiffies + 3 * HZ;
+	if (drm_core_check_feature(dev, DRIVER_GEM))
+		/* With GEM the hangcheck timer should kick us out of the loop,
+		 * leaving it early runs the risk of corrupting GEM state (due
+		 * to running on almost untested codepaths). But on resume
+		 * timers don't work yet, so prevent a complete hang in that
+		 * case by choosing an insanely large timeout. */
+		end = jiffies + 60 * HZ;
+	else
+		end = jiffies + 3 * HZ;
+
 	do {
 		ring->head = I915_READ_HEAD(ring);
 		ring->space = ring_space(ring);
-- 
cgit v1.2.3


From 4cd53c0c8b01fc05c3ad5b2acdad02e37d3c2f55 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 14 Dec 2012 16:01:25 +0100
Subject: drm/i915: paper over missed irq issues with force wake voodoo

Two things seem to do the trick on my ivb machine here:
- prevent the gt from powering down while waiting for seqno
  notification interrupts by grabbing the force_wake in get_irq (and
  dropping it in put_irq again).
- ordering writes from the ring's CS by reading a CS register, ACTHD
  seems to work.

Only the blt&bsd ring on ivb seem to be massively affected by this,
but for paranoia do this dance also on the render ring and on snb
(i.e. all gpus with forcewake).

Tested with Eric's glCopyPixels loop which without this patch scores a
missed irq every few seconds.

This patch needs my forcewake rework to use a spinlock instead of
dev->struct_mutex.

After crawling through docs a lot I've found the following nugget:

Internal doc "SNB GT PM Programming Guide", Section 4.3.1:

"GT does not generate interrupts while in RC6 (by design)"

So it looks like rc6 and irq generation are indeed related.

v2: Improve the comment per Eugeni Dodonov's suggestion.

v3: Add the documentation snipped. Also restrict the w/a to ivb only
for -fixes, as suggested by Keith Packard.

Cc: stable@kernel.org
Cc: Eric Anholt <eric@anholt.net>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Eugeni Dodonov <eugeni.dodonov@intel.com>
Tested-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Reviewed-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 77e729d4e4f0..fa5702c5da42 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -635,6 +635,19 @@ render_ring_add_request(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+static u32
+gen6_ring_get_seqno(struct intel_ring_buffer *ring)
+{
+	struct drm_device *dev = ring->dev;
+
+	/* Workaround to force correct ordering between irq and seqno writes on
+	 * ivb (and maybe also on snb) by reading from a CS register (like
+	 * ACTHD) before reading the status page. */
+	if (IS_GEN7(dev))
+		intel_ring_get_active_head(ring);
+	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
+}
+
 static u32
 ring_get_seqno(struct intel_ring_buffer *ring)
 {
@@ -811,6 +824,12 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
 	if (!dev->irq_enabled)
 	       return false;
 
+	/* It looks like we need to prevent the gt from suspending while waiting
+	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
+	 * blt/bsd rings on ivb. */
+	if (IS_GEN7(dev))
+		gen6_gt_force_wake_get(dev_priv);
+
 	spin_lock(&ring->irq_lock);
 	if (ring->irq_refcount++ == 0) {
 		ring->irq_mask &= ~rflag;
@@ -835,6 +854,9 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
 		ironlake_disable_irq(dev_priv, gflag);
 	}
 	spin_unlock(&ring->irq_lock);
+
+	if (IS_GEN7(dev))
+		gen6_gt_force_wake_put(dev_priv);
 }
 
 static bool
@@ -1341,7 +1363,7 @@ static const struct intel_ring_buffer gen6_bsd_ring = {
 	.write_tail		= gen6_bsd_ring_write_tail,
 	.flush			= gen6_ring_flush,
 	.add_request		= gen6_add_request,
-	.get_seqno		= ring_get_seqno,
+	.get_seqno		= gen6_ring_get_seqno,
 	.irq_get		= gen6_bsd_ring_get_irq,
 	.irq_put		= gen6_bsd_ring_put_irq,
 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
@@ -1476,7 +1498,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
 	.write_tail		= ring_write_tail,
 	.flush			= blt_ring_flush,
 	.add_request		= gen6_add_request,
-	.get_seqno		= ring_get_seqno,
+	.get_seqno		= gen6_ring_get_seqno,
 	.irq_get		= blt_ring_get_irq,
 	.irq_put		= blt_ring_put_irq,
 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
@@ -1499,6 +1521,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->flush = gen6_render_ring_flush;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
+		ring->get_seqno = gen6_ring_get_seqno;
 	} else if (IS_GEN5(dev)) {
 		ring->add_request = pc_render_add_request;
 		ring->get_seqno = pc_render_get_seqno;
-- 
cgit v1.2.3


From 8f0fc977f58c36e75e205486c1aebb9b8e4263e1 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Thu, 19 Jan 2012 21:13:47 -0800
Subject: Revert "drm/i915: Work around gen7 BLT ring synchronization issues."

This reverts commit 42ff6572e5a4a7414330a4ca91f0335da67deca9.

New forcewake voodoo makes this no longer necessary.

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Keith Packard <keithp@keithp.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fa5702c5da42..1ab842c6032e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -804,17 +804,6 @@ ring_add_request(struct intel_ring_buffer *ring,
 	return 0;
 }
 
-static bool
-gen7_blt_ring_get_irq(struct intel_ring_buffer *ring)
-{
-	/* The BLT ring on IVB appears to have broken synchronization
-	 * between the seqno write and the interrupt, so that the
-	 * interrupt appears first.  Returning false here makes
-	 * i915_wait_request() do a polling loop, instead.
-	 */
-	return false;
-}
-
 static bool
 gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
 {
@@ -1600,8 +1589,5 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 
 	*ring = gen6_blt_ring;
 
-	if (IS_GEN7(dev))
-		ring->irq_get = gen7_blt_ring_get_irq;
-
 	return intel_init_ring_buffer(dev, ring);
 }
-- 
cgit v1.2.3


From 5d031e5b633d910f35e6e0abce94d9d842390006 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 8 Feb 2012 13:34:13 +0000
Subject: drm/i915: Remove use of the autoreported ringbuffer HEAD position

This is a revert of 6aa56062eaba67adfb247cded244fd877329588d.

This was originally introduced to workaround reads of the ringbuffer
registers returning 0 on SandyBridge causing hangs due to ringbuffer
overflow. The root cause here was reads through the GT powerwell require
the forcewake dance, something we only learnt of later. Now it appears
that reading the reported head position from the HWS is returning
garbage, leading once again to hangs.

For example, on q35 the autoreported head reports:
  [  217.975608] head now 00010000, actual 00010000
  [  436.725613] head now 00200000, actual 00200000
  [  462.956033] head now 00210000, actual 00210010
  [  485.501409] head now 00400000, actual 00400020
  [  508.064280] head now 00410000, actual 00410000
  [  530.576078] head now 00600000, actual 00600020
  [  553.273489] head now 00610000, actual 00610018
which appears reasonably sane. In contrast, if we look at snb:
  [  141.970680] head now 00e10000, actual 00008238
  [  141.974062] head now 02734000, actual 000083c8
  [  141.974425] head now 00e10000, actual 00008488
  [  141.980374] head now 032b5000, actual 000088b8
  [  141.980885] head now 03271000, actual 00008950
  [  142.040628] head now 02101000, actual 00008b40
  [  142.180173] head now 02734000, actual 00009050
  [  142.181090] head now 00000000, actual 00000ae0
  [  142.183737] head now 02734000, actual 00009050

In addition, the automatic reporting of the head position is scheduled
to be defeatured in the future. It has no more utility, remove it.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=45492
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Tested-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1ab842c6032e..536191540b03 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -301,7 +301,7 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 
 	I915_WRITE_CTL(ring,
 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
-			| RING_REPORT_64K | RING_VALID);
+			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
 	if ((I915_READ_CTL(ring) & RING_VALID) == 0 ||
@@ -1132,18 +1132,6 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long end;
-	u32 head;
-
-	/* If the reported head position has wrapped or hasn't advanced,
-	 * fallback to the slow and accurate path.
-	 */
-	head = intel_read_status_page(ring, 4);
-	if (head > ring->head) {
-		ring->head = head;
-		ring->space = ring_space(ring);
-		if (ring->space >= n)
-			return 0;
-	}
 
 	trace_i915_ring_wait_begin(ring);
 	if (drm_core_check_feature(dev, DRIVER_GEM))
-- 
cgit v1.2.3