From 101b376d358e2f724db5e0ac4d207079b16c4754 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 9 Oct 2014 07:11:47 -0700 Subject: drm/i915/bdw: Remove BDW preproduction W/As until C stepping. Let's clean this a bit v2: Rebase after other Mika's patch that removed some BDW production workarounds. v3: Removed stepping info. Reviewed-by: Mika Kuoppala Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7c0c28c65cb4..5ebe46a05a05 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -712,13 +712,12 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) return ret; /* WaDisablePartialInstShootdown:bdw */ - /* WaDisableThreadStallDopClockGating:bdw */ - /* FIXME: Unclear whether we really need this on production bdw. */ + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | STALL_DOP_GATING_DISABLE)); - /* WaDisableDopClockGating:bdw May not be needed for production */ + /* WaDisableDopClockGating:bdw */ intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); -- cgit v1.2.3 From 7225342ab501befdb64bcec76ded41f5897c0855 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 7 Oct 2014 17:21:26 +0300 Subject: drm/i915: Build workaround list in ring initialization If we build the workaround list in ring initialization and decouple it from the actual writing of values, we gain the ability to decide where and how we want to apply the values. The advantage of this will become more clear when we need to initialize workarounds on older gens where it is not possible to write all the registers through ring LRIs. v2: rebase on newest bdw workarounds Cc: Arun Siluvery Cc: Damien Lespiau Signed-off-by: Mika Kuoppala Reviewed-by: Arun Siluvery [danvet: Resolve tiny conflict in comments and ocd alignments a bit.] [danvet2: Remove bogus force_wake_get call spotted by Paulo and QA.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 184 ++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 80 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5ebe46a05a05..5f935d4dfb6a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -665,79 +665,107 @@ err: return ret; } -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, - u32 addr, u32 value) +static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) { + int ret, i; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_workarounds *w = &dev_priv->workarounds; - if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS)) - return; + if (WARN_ON(w->count == 0)) + return 0; - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, addr); - intel_ring_emit(ring, value); + ring->gpu_caches_dirty = true; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF; - /* value is updated with the status of remaining bits of this - * register when it is read from debugfs file - */ - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; - dev_priv->num_wa_regs++; + ret = intel_ring_begin(ring, w->count * 3); + if (ret) + return ret; + + for (i = 0; i < w->count; i++) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, w->reg[i].addr); + intel_ring_emit(ring, w->reg[i].value); + } + + intel_ring_advance(ring); + + ring->gpu_caches_dirty = true; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - return; + return 0; +} + +static int wa_add(struct drm_i915_private *dev_priv, + const u32 addr, const u32 val, const u32 mask) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; } +#define WA_REG(addr, val, mask) { \ + const int r = wa_add(dev_priv, (addr), (val), (mask)); \ + if (r) \ + return r; \ + } + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, _MASKED_BIT_ENABLE(mask), (mask) & 0xffff) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, _MASKED_BIT_DISABLE(mask), (mask) & 0xffff) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, I915_READ(addr) | (mask), mask) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, I915_READ(addr) & ~(mask), mask) + +#define WA_WRITE(addr, val) WA_REG(addr, val, 0xffffffff) + static int bdw_init_workarounds(struct intel_engine_cs *ring) { - int ret; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - /* - * update the number of dwords required based on the - * actual number of workarounds applied - */ - ret = intel_ring_begin(ring, 18); - if (ret) - return ret; - /* WaDisablePartialInstShootdown:bdw */ /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE - | STALL_DOP_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE | + STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ /* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */ - intel_ring_emit_wa(ring, HDC_CHICKEN0, - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT | - (IS_BDW_GT3(dev) ? - HDC_FENCE_DEST_SLM_DISABLE : 0) - )); + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT | + (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); /* Wa4x4STCOptimizationDisable:bdw */ - intel_ring_emit_wa(ring, CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE); /* * BSpec recommends 8x4 when MSAA is used, @@ -747,52 +775,50 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - intel_ring_emit_wa(ring, GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - intel_ring_advance(ring); - - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", - dev_priv->num_wa_regs); + WA_SET_BIT_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); return 0; } static int chv_init_workarounds(struct intel_engine_cs *ring) { - int ret; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - ret = intel_ring_begin(ring, 12); - if (ret) - return ret; - /* WaDisablePartialInstShootdown:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaDisableThreadStallDopClockGating:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:chv (pre-production hw) */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); - intel_ring_advance(ring); + return 0; +} + +static int init_workarounds_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + WARN_ON(ring->id != RCS); + + dev_priv->workarounds.count = 0; + + if (IS_BROADWELL(dev)) + return bdw_init_workarounds(ring); + + if (IS_CHERRYVIEW(dev)) + return chv_init_workarounds(ring); return 0; } @@ -852,7 +878,7 @@ static int init_render_ring(struct intel_engine_cs *ring) if (HAS_L3_DPF(dev)) I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); - return ret; + return init_workarounds_ring(ring); } static void render_ring_cleanup(struct intel_engine_cs *ring) @@ -2298,10 +2324,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - if (IS_CHERRYVIEW(dev)) - ring->init_context = chv_init_workarounds; - else - ring->init_context = bdw_init_workarounds; + + ring->init_context = intel_ring_workarounds_emit; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; -- cgit v1.2.3 From 22a916aaa187946e8df724ab7838a0c13b45a9f4 Mon Sep 17 00:00:00 2001 From: Arun Siluvery Date: Wed, 22 Oct 2014 18:59:52 +0100 Subject: drm/i915: Emit even number of dwords when emitting LRIs The number of DWords should be even when doing ring emits as command sequences require QWord alignment. There was some discussion about the maximum length of the MI_LRI command. Quoting Mika "I did some test with bdw: "The maximum is 128 writes, resulting the 8 bit length field of the command being 0xff, thus following the spec. The 128'th write went through. "Perhaps the max command length is then less in older gens? "Perhaps WARN_ON(x > 128) in MI_LOAD_REGISTER_IMM would be in place but one needs minor tweak to command parser a bit also then. #define I915_MAX_WA_REGS 16 keeps us safe for now atleast." Ville commented that on pre-gen6 the length field seems to be restricted to 0x3f though. So for all cases we should be ok. v2: user LRI variant that can write multiple regs in one go (Damien). We can simply insert one NOP at the end instead of one per register write. Cc: Mika Kuoppala Signed-off-by: Arun Siluvery Reviewed-by: Damien Lespiau [danvet: Add a summary of the MI_LRI length discussion.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c') diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5f935d4dfb6a..603148e6dbc3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -680,15 +680,16 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring) if (ret) return ret; - ret = intel_ring_begin(ring, w->count * 3); + ret = intel_ring_begin(ring, (w->count * 2 + 2)); if (ret) return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); for (i = 0; i < w->count; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, w->reg[i].addr); intel_ring_emit(ring, w->reg[i].value); } + intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); -- cgit v1.2.3