From 403bdd10c815029694046adf5ffde0577cbd2866 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 7 Aug 2014 16:05:39 +0200 Subject: drm/i915: No busy-loop wait_for in the ring init code Doing a 1s wait (tops) with the cpu is a bit excessive. Tune it down like everything else in that code. v2: Also insert the missing space Chris spotted. Cc: Naresh Kumar Kachhi Cc: Chris Wilson Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16371a444426..117543e58d48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -476,8 +476,8 @@ static bool stop_ring(struct intel_engine_cs *ring) if (!IS_GEN2(ring->dev)) { I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); - if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); return false; } } -- cgit v1.2.3 From 9bec9b1334d687c0a9fcf3d3a1987a61b4826a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 09:21:35 +0100 Subject: drm/i915: Double check ring is idle before declaring the GPU wedged During ring initialisation, sometimes we observe, though not in production hardware, that the idle flag is not set even though the ring is empty. Double check before giving up. Signed-off-by: Chris Wilson Cc: Damien Lespiau Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 117543e58d48..a059b64a0fb2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -478,7 +478,12 @@ static bool stop_ring(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); - return false; + /* Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring)) + return false; } } -- cgit v1.2.3 From 84c2377fcee7a43cd964b62143e9a3714130bb0c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:15 +0100 Subject: drm/i915/bdw: Allocate ringbuffers for Logical Ring Contexts As we have said a couple of times by now, logical ring contexts have their own ringbuffers: not only the backing pages, but the whole management struct. In a previous version of the series, this was achieved with two separate patches: drm/i915/bdw: Allocate ringbuffer backing objects for default global LRC drm/i915/bdw: Allocate ringbuffer for user-created LRCs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a059b64a0fb2..064652034d7e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1519,7 +1519,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { if (!ringbuf->obj) return; @@ -1530,8 +1530,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) ringbuf->obj = NULL; } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; -- cgit v1.2.3 From 0c7dd53b84def4fbbba907bef3d32a5171b617a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 16:17:44 +0200 Subject: drm/i915/bdw: Add a context and an engine pointers to the ringbuffer Any given ringbuffer is unequivocally tied to one context and one engine. By setting the appropriate pointers to them, the ringbuffer struct holds all the infromation you might need to submit a workload for processing, Execlists style. v2: Drop ring->ctx since that looks terribly ill-defined for legacy ringbuffer submission. Signed-off-by: Oscar Mateo (v1) Acked-by: Damien Lespiau (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 064652034d7e..c35f956ed6a0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); ringbuf->size = 32 * PAGE_SIZE; + ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); init_waitqueue_head(&ring->irq_queue); -- cgit v1.2.3 From 48d823878d64f93163f5a949623346748bbce1b4 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:23 +0100 Subject: drm/i915/bdw: Generic logical ring init and cleanup Allocate and populate the default LRC for every ring, call gen-specific init/cleanup, init/fini the command parser and set the status page (now inside the LRC object). These are things all engines/rings have in common. Stopping the ring before cleanup and initializing the seqnos is left as a TODO task (we need more infrastructure in place before we can achieve this). v2: Check the ringbuffer backing obj for ring_is_initialized, instead of the context backing obj (similar, but not exactly the same). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c35f956ed6a0..e4b97f5c5797 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -40,6 +40,23 @@ */ #define CACHELINE_BYTES 64 +bool +intel_ring_initialized(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (!dev) + return false; + + if (i915.enable_execlists) { + struct intel_context *dctx = ring->default_context; + struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf; + + return ringbuf->obj; + } else + return ring->buffer && ring->buffer->obj; +} + static inline int __ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); -- cgit v1.2.3 From 9b1136d505b1de5478e11b59ca59cf8ce2a33217 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:24 +0100 Subject: drm/i915/bdw: GEN-specific logical ring init Logical rings do not need most of the initialization their legacy ringbuffer counterparts do: we just need the pipe control object for the render ring, enable Execlists on the hardware and a few workarounds. v2: Squash with: "drm/i915: Extract pipe control fini & make init outside accesible". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Make checkpatch happy.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 34 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e4b97f5c5797..dab5e7c79036 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -597,8 +597,25 @@ out: return ret; } -static int -init_pipe_control(struct intel_engine_cs *ring) +void +intel_fini_pipe_control(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (ring->scratch.obj == NULL) + return; + + if (INTEL_INFO(dev)->gen >= 5) { + kunmap(sg_page(ring->scratch.obj->pages->sgl)); + i915_gem_object_ggtt_unpin(ring->scratch.obj); + } + + drm_gem_object_unreference(&ring->scratch.obj->base); + ring->scratch.obj = NULL; +} + +int +intel_init_pipe_control(struct intel_engine_cs *ring) { int ret; @@ -673,7 +690,7 @@ static int init_render_ring(struct intel_engine_cs *ring) _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); if (INTEL_INFO(dev)->gen >= 5) { - ret = init_pipe_control(ring); + ret = intel_init_pipe_control(ring); if (ret) return ret; } @@ -708,16 +725,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) dev_priv->semaphore_obj = NULL; } - if (ring->scratch.obj == NULL) - return; - - if (INTEL_INFO(dev)->gen >= 5) { - kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(ring->scratch.obj); - } - - drm_gem_object_unreference(&ring->scratch.obj->base); - ring->scratch.obj = NULL; + intel_fini_pipe_control(ring); } static int gen8_rcs_signal(struct intel_engine_cs *signaller, -- cgit v1.2.3 From 82e104cc266c6da30a30fc5028b2f0236c669cd7 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:26 +0100 Subject: drm/i915/bdw: New logical ring submission mechanism Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dab5e7c79036..0bfa018fab20 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring) return ring->buffer && ring->buffer->obj; } -static inline int __ring_space(int head, int tail, int size) +int __intel_ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); if (space < 0) @@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size) return space; } -static inline int ring_space(struct intel_ringbuffer *ringbuf) +int intel_ring_space(struct intel_ringbuffer *ringbuf) { - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); + return __intel_ring_space(ringbuf->head & HEAD_ADDR, + ringbuf->tail, ringbuf->size); } -static bool intel_ring_stopped(struct intel_engine_cs *ring) +bool intel_ring_stopped(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); @@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring) else { ringbuf->head = I915_READ_HEAD(ring); ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); ringbuf->last_retired_head = -1; } @@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) return 0; } list_for_each_entry(request, &ring->request_list, list) { - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= n) { seqno = request->seqno; break; } @@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } @@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) trace_i915_ring_wait_begin(ring); do { ringbuf->head = I915_READ_HEAD(ring); - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) { ret = 0; break; @@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) iowrite32(MI_NOOP, virt++); ringbuf->tail = 0; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } -- cgit v1.2.3 From 4712274c362b7730a1c6e01c9a51a6d46f5b7f43 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:28 +0100 Subject: drm/i915/bdw: GEN-specific logical ring emit flush Same as the legacy-style ring->flush. v2: The BSD invalidate bit still exists in GEN8! Add it for the VCS rings (but still consolidate the blt and bsd ring flushes into one). This was noticed by Brad Volkin. v3: The command for BSD and for other rings is slightly different: get it exactly the same as in gen6_ring_flush + gen6_bsd_ring_flush Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bfa018fab20..4236014c1cda 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,13 +33,6 @@ #include "i915_trace.h" #include "intel_drv.h" -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 - bool intel_ring_initialized(struct intel_engine_cs *ring) { -- cgit v1.2.3 From 896ab1a5d54269b463a24194c2e4a369103b46d8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:51 +0200 Subject: drm/i915: Fix up checks for aliasing ppgtt A subsequent patch will no longer initialize the aliasing ppgtt if we have full ppgtt enabled, since we simply don't need that any more. Unfortunately a few places check for the aliasing ppgtt instead of checking for ppgtt in general. Fix them up. One special case are the gtt offset and size macros, which have some code to remap the aliasing ppgtt to the global gtt. The aliasing ppgtt is _not_ a logical address space, so passing that in as the vm is plain and simple a bug. So just WARN about it and carry on - we have a gracefully fall-through anyway if we can't find the vma. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4236014c1cda..13543f8528c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2006,9 +2006,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, unsigned flags) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && - !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); -- cgit v1.2.3 From cc9130be805d955f0e06642e57741dd9df1fbc86 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:42 +0100 Subject: drm/i915/bdw: Make sure gpu reset still works with Execlists If we reset a ring after a hang, we have to make sure that we clear out all queued Execlists requests. v2: The ring is, at this point, already being correctly re-programmed for Execlists, and the hangcheck counters cleared. v3: Daniel suggests to drop the "if (execlists)" because the Execlists queue should be empty in legacy mode (which is true, if we do the INIT_LIST_HEAD). v4: Do the pending intel_runtime_pm_put Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 13543f8528c2..4fb1ec95ec08 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1612,6 +1612,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->execlist_queue); ringbuf->size = 32 * PAGE_SIZE; ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); -- cgit v1.2.3 From c5ad011d7d256ecbe173324029e992817194d2b0 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 4 Aug 2014 03:51:38 -0700 Subject: drm/i915: FBC flush nuke for BDW According to spec FBC on BDW and HSW are identical without any gaps. So let's copy the nuke and let FBC really start compressing stuff. Without this patch we can verify with false color that nothing is being compressed. With the nuke in place and false color it is possible to see false color debugs. Unfortunatelly on some rings like BCS on BDW we have to avoid Bits 22:18 on LRIs due to a high risk of hung. So, when using Blt ring for frontbuffer rend cache would never been cleaned and FBC would stop compressing buffer. One alternative is to cache clean on software frontbuffer tracking. v2: Fix rebase conflict. v3: Do not clean cache on BCS ring. Instead use sw frontbuffer tracking. Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4fb1ec95ec08..de7654623acc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -444,7 +444,14 @@ gen8_render_ring_flush(struct intel_engine_cs *ring, return ret; } - return gen8_emit_pipe_control(ring, flags, scratch_addr); + ret = gen8_emit_pipe_control(ring, flags, scratch_addr); + if (ret) + return ret; + + if (!invalidate_domains && flush_domains) + return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); + + return 0; } static void ring_write_tail(struct intel_engine_cs *ring, -- cgit v1.2.3 From 86d7f23842f1bce3ab5e8c8d0c676112bbc4c99b Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 26 Aug 2014 14:44:50 +0100 Subject: drm/i915/bdw: Apply workarounds in render ring init function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For BDW workarounds are currently initialized in init_clock_gating() but they are lost during reset, suspend/resume etc; this patch moves the WAs that are part of register state context to render ring init fn otherwise default context ends up with incorrect values as they don't get initialized until init_clock_gating fn. v2: Add workarounds to golden render state This method has its own issues, first of all this is different for each gen and it is generated using a tool so adding new workaround and mainitaining them across gens is not a straightforward process. v3: Use LRIs to emit these workarounds (Ville) Instead of modifying the golden render state the same LRIs are emitted from within the driver. v4: Use abstract name when exporting gen specific routines (Chris) For: VIZ-4092 Signed-off-by: Arun Siluvery Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 79 +++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index de7654623acc..1d5bfdb4fe97 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -657,6 +657,84 @@ err: return ret; } +static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, + u32 addr, u32 value) +{ + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, addr); + intel_ring_emit(ring, value); +} + +static int gen8_init_workarounds(struct intel_engine_cs *ring) +{ + int ret; + + /* + * workarounds applied in this fn are part of register state context, + * they need to be re-initialized followed by gpu reset, suspend/resume, + * module reload. + */ + + /* + * update the number of dwords required based on the + * actual number of workarounds applied + */ + ret = intel_ring_begin(ring, 24); + if (ret) + return ret; + + /* WaDisablePartialInstShootdown:bdw */ + /* WaDisableThreadStallDopClockGating:bdw */ + /* FIXME: Unclear whether we really need this on production bdw. */ + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE + | STALL_DOP_GATING_DISABLE)); + + /* WaDisableDopClockGating:bdw May not be needed for production */ + intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + + /* + * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for + * pre-production hardware + */ + intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS + | GEN8_SAMPLER_POWER_BYPASS_DIS)); + + intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + + intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + intel_ring_emit_wa(ring, HDC_CHICKEN0, + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); + + /* Wa4x4STCOptimizationDisable:bdw */ + intel_ring_emit_wa(ring, CACHE_MODE_1, + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + intel_ring_emit_wa(ring, GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + + intel_ring_advance(ring); + + return 0; +} + static int init_render_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -2143,6 +2221,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } + ring->init_context = gen8_init_workarounds; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; -- cgit v1.2.3 From 888b59951ed5ac450fe3ddd7b3937d905b9bafbc Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Tue, 26 Aug 2014 14:44:51 +0100 Subject: drm/i915/bdw: Export workaround data to debugfs The workarounds that are applied are exported to a debugfs file; this is used to verify their state after the test case (reset or suspend/resume etc). This patch is only required to support i-g-t. Signed-off-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1d5bfdb4fe97..14b517d61b4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -660,20 +660,40 @@ err: static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, u32 addr, u32 value) { + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->num_wa_regs > I915_MAX_WA_REGS) + return; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, addr); intel_ring_emit(ring, value); + + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF; + /* value is updated with the status of remaining bits of this + * register when it is read from debugfs file + */ + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; + dev_priv->num_wa_regs++; + + return; } static int gen8_init_workarounds(struct intel_engine_cs *ring) { int ret; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; /* * workarounds applied in this fn are part of register state context, * they need to be re-initialized followed by gpu reset, suspend/resume, * module reload. */ + dev_priv->num_wa_regs = 0; + memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); /* * update the number of dwords required based on the @@ -732,6 +752,9 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) intel_ring_advance(ring); + DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", + dev_priv->num_wa_regs); + return 0; } -- cgit v1.2.3 From 00e1e623e62cd8452e28633182b91ddcbb70cc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 27 Aug 2014 17:33:12 +0300 Subject: drm/i915: Init some CHV workarounds via LRIs in ring->init_context() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow the BDW example and apply the workarounds touching registers which are saved in the context image through LRIs in the new ring->init_context() hook. This makes Mesa much happier and eg. glxgears doesn't hang after the first frame. Cc: Arun Siluvery Signed-off-by: Ville Syrjälä [danvet: Add missing wa table initialization to avoid a functional conflict with Arun's wa table debugfs support.] Reviewed-by: "Barbalho, Rafael" Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 46 +++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 14b517d61b4e..12135ef36060 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -681,7 +681,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, return; } -static int gen8_init_workarounds(struct intel_engine_cs *ring) +static int bdw_init_workarounds(struct intel_engine_cs *ring) { int ret; struct drm_device *dev = ring->dev; @@ -758,6 +758,45 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) return 0; } +static int chv_init_workarounds(struct intel_engine_cs *ring) +{ + int ret; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + /* + * workarounds applied in this fn are part of register state context, + * they need to be re-initialized followed by gpu reset, suspend/resume, + * module reload. + */ + dev_priv->num_wa_regs = 0; + memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); + + ret = intel_ring_begin(ring, 12); + if (ret) + return ret; + + /* WaDisablePartialInstShootdown:chv */ + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + + /* WaDisableThreadStallDopClockGating:chv */ + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + + /* WaDisableDopClockGating:chv (pre-production hw) */ + intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + + /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ + intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + + intel_ring_advance(ring); + + return 0; +} + static int init_render_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -2244,7 +2283,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - ring->init_context = gen8_init_workarounds; + if (IS_CHERRYVIEW(dev)) + ring->init_context = chv_init_workarounds; + else + ring->init_context = bdw_init_workarounds; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; -- cgit v1.2.3 From 55820e1e840def3802fc366607f2b25e31036ab1 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 30 Aug 2014 16:51:00 +0100 Subject: drm/i915: Don't overrun the intel_wa_regs array When entering intel_ring_emit_wa() with num_wa_regs equal to I915_MAX_WA_REGS, we end up indexing the intel_wa_regs array beyond its allocation. Fix the check then. Signed-off-by: Damien Lespiau Reviewed-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 12135ef36060..96618c0c5085 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -663,7 +663,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->num_wa_regs > I915_MAX_WA_REGS) + if (dev_priv->num_wa_regs >= I915_MAX_WA_REGS) return; intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); -- cgit v1.2.3 From 04ad2dc7116347a4219b13935c7569ceaab95155 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 30 Aug 2014 16:51:01 +0100 Subject: drm/i915: Don't silently discard workarounds If we happen to emit more than I915_MAX_WA_REGS workarounds, we will currently discard them, not even emit the LRI. Not really what we want, so warn loudly. Signed-off-by: Damien Lespiau Reviewed-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 96618c0c5085..5ef0f994b0bb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -663,7 +663,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->num_wa_regs >= I915_MAX_WA_REGS) + if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) return; intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); -- cgit v1.2.3 From b07ba1dc78a251fc02992a35b0fd8757029566e4 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 30 Aug 2014 16:51:02 +0100 Subject: drm/i915: Remove unneeded brackets Signed-off-by: Damien Lespiau Reviewed-by: Arun Siluvery Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5ef0f994b0bb..f8aadc3c2124 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -671,7 +671,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, intel_ring_emit(ring, value); dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF; + dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; /* value is updated with the status of remaining bits of this * register when it is read from debugfs file */ -- cgit v1.2.3 From 95468892fdfeef6d1004b524e35957629efdbe00 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Aug 2014 15:39:54 +0100 Subject: drm/i915: Reset the HEAD pointer for the ring after writing START MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville found an old w/a documented for g4x that suggested that we need to reset the HEAD after writing START. This is a useful fixup for some of the g4x ring initialisation woes, but as usual, not all. v2: Do the rewrite unconditionally anyway References: https://bugs.freedesktop.org/show_bug.cgi?id=76554 Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f8aadc3c2124..85fc2b1a124a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -563,6 +563,14 @@ static int init_ring_common(struct intel_engine_cs *ring) * also enforces ordering), otherwise the hw might lose the new ring * register values. */ I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj)); + + /* WaClearRingBufHeadRegAtInit:ctg,elk */ + if (I915_READ_HEAD(ring)) + DRM_DEBUG("%s initialization failed [head=%08x], fudging\n", + ring->name, I915_READ_HEAD(ring)); + I915_WRITE_HEAD(ring, 0); + (void)I915_READ_HEAD(ring); + I915_WRITE_CTL(ring, ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID); -- cgit v1.2.3 From 770722585639bc2da683e72f610d5f614298e415 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 10 Sep 2014 12:18:27 +0100 Subject: drm/i915: HSW always use GGTT selector for secure batches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gen6 and earlier conflate address space selection (ppgtt vs ggtt) with the security bit (i.e. only privileged batches were allowed to run from ggtt). From Haswell only, you are able to select the security bit separate from the address space - and we always requested to use ppgtt. This breaks the golden render state batch execution with full-ppgtt as that is only present in the global GTT and more generally any secure batch that is not colocated in the ppgtt and ggtt. So we need to disable the use of the ppgtt selector bit for secure batches, or else we hang immediately upon boot and thence after every GPU reset... v2: Only HSW differentiates between secure dispatch and ggtt, so simply ignore the differentiation and always use secure==ggtt. Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä [danvet: Rectify commit message as noted by Chris.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 109de2eeb9a8..25795f2efdcb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2203,8 +2203,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; intel_ring_emit(ring, - MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); + MI_BATCH_BUFFER_START | + (flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- cgit v1.2.3 From d37cf5f7e1b315585940a735a8508d955ffc0f16 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 19 Sep 2014 20:05:26 +0300 Subject: drm/i915/bdw: Cleanup pre prod workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit as these have been fixed in production hw and hurt performance if applied. v2: adjust requested ring space (Ville) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83482 Tested-by: zhoujian Signed-off-by: Mika Kuoppala Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 25795f2efdcb..6dc981f0671e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -707,7 +707,7 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * update the number of dwords required based on the * actual number of workarounds applied */ - ret = intel_ring_begin(ring, 24); + ret = intel_ring_begin(ring, 18); if (ret) return ret; @@ -722,19 +722,8 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - /* - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for - * pre-production hardware - */ intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS - | GEN8_SAMPLER_POWER_BYPASS_DIS)); - - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); - - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB -- cgit v1.2.3