34 files changed, 1594 insertions, 848 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dda724f04445..3412413408c0 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -3,12 +3,15 @@
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
+subdir-ccflags-y += \
+	$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
 
 # Please keep these build lists sorted!
 
 # core driver code
 i915-y := i915_drv.o \
 	  i915_irq.o \
+	  i915_memcpy.o \
 	  i915_params.o \
 	  i915_pci.o \
           i915_suspend.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f62285c1ed7f..c461072da142 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -787,8 +787,6 @@ static void i915_ring_seqno_info(struct seq_file *m,
 
 	seq_printf(m, "Current sequence (%s): %x\n",
 		   engine->name, intel_engine_get_seqno(engine));
-	seq_printf(m, "Current user interrupts (%s): %lx\n",
-		   engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
 
 	spin_lock(&b->lock);
 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1434,11 +1432,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 			   engine->hangcheck.seqno,
 			   seqno[id],
 			   engine->last_submitted_seqno);
-		seq_printf(m, "\twaiters? %d\n",
-			   intel_engine_has_waiter(engine));
-		seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
-			   engine->hangcheck.user_interrupts,
-			   READ_ONCE(engine->breadcrumbs.irq_wakeups));
+		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
+			   yesno(intel_engine_has_waiter(engine)),
+			   yesno(test_bit(engine->id,
+					  &dev_priv->gpu_error.missed_irq_rings)));
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)engine->hangcheck.acthd,
 			   (long long)acthd[id]);
@@ -2547,6 +2544,7 @@ static void i915_guc_client_info(struct seq_file *m,
 				 struct i915_guc_client *client)
 {
 	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	uint64_t tot = 0;
 
 	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2557,15 +2555,14 @@ static void i915_guc_client_info(struct seq_file *m,
 		client->wq_size, client->wq_offset, client->wq_tail);
 
 	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
-	seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
 	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
 	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
 
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_id(engine, dev_priv, id) {
+		u64 submissions = client->submissions[id];
+		tot += submissions;
 		seq_printf(m, "\tSubmissions: %llu %s\n",
-				client->submissions[engine->id],
-				engine->name);
-		tot += client->submissions[engine->id];
+				submissions, engine->name);
 	}
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }
@@ -2578,6 +2575,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	struct intel_guc guc;
 	struct i915_guc_client client = {};
 	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	u64 total = 0;
 
 	if (!HAS_GUC_SCHED(dev_priv))
@@ -2604,11 +2602,11 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
 
 	seq_printf(m, "\nGuC submissions:\n");
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_id(engine, dev_priv, id) {
+		u64 submissions = guc.submissions[id];
+		total += submissions;
 		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-			engine->name, guc.submissions[engine->id],
-			guc.last_seqno[engine->id]);
-		total += guc.submissions[engine->id];
+			engine->name, submissions, guc.last_seqno[id]);
 	}
 	seq_printf(m, "\t%s: %llu\n", "Total", total);
 
@@ -3228,7 +3226,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_engine_cs *engine;
-	int num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
+	int num_rings = INTEL_INFO(dev)->num_rings;
 	enum intel_engine_id id;
 	int j, ret;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 57eb380a2c21..13ae340ef1f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -827,6 +827,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	mutex_init(&dev_priv->wm.wm_mutex);
 	mutex_init(&dev_priv->pps_mutex);
 
+	i915_memcpy_init_early(dev_priv);
+
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
 		return ret;
@@ -1560,6 +1562,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	i915_gem_resume(dev);
 
 	i915_restore_state(dev);
+	intel_pps_unlock_regs_wa(dev_priv);
 	intel_opregion_setup(dev_priv);
 
 	intel_init_pch_refclk(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c36d17659ebe..bf193ba1574e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -793,6 +793,7 @@ struct intel_device_info {
 	u8 gen;
 	u16 gen_mask;
 	u8 ring_mask; /* Rings supported by the HW */
+	u8 num_rings;
 	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON);
 	/* Register offsets for the various display pipes and transcoders */
 	int pipe_offsets[I915_MAX_TRANSCODERS];
@@ -1061,13 +1062,6 @@ struct intel_gmbus {
 
 struct i915_suspend_saved_registers {
 	u32 saveDSPARB;
-	u32 saveLVDS;
-	u32 savePP_ON_DELAYS;
-	u32 savePP_OFF_DELAYS;
-	u32 savePP_ON;
-	u32 savePP_OFF;
-	u32 savePP_CONTROL;
-	u32 savePP_DIVISOR;
 	u32 saveFBC_CONTROL;
 	u32 saveCACHE_MODE_0;
 	u32 saveMI_ARB_STATE;
@@ -1749,6 +1743,8 @@ struct drm_i915_private {
 
 	uint32_t psr_mmio_base;
 
+	uint32_t pps_mmio_base;
+
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
@@ -1840,6 +1836,7 @@ struct drm_i915_private {
 	enum modeset_restore modeset_restore;
 	struct mutex modeset_restore_lock;
 	struct drm_atomic_state *modeset_restore_state;
+	struct drm_modeset_acquire_ctx reset_ctx;
 
 	struct list_head vm_list; /* Global list of all address spaces */
 	struct i915_ggtt ggtt; /* VM representing the global address space */
@@ -3147,13 +3144,20 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	obj->pages_pin_count--;
 }
 
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+};
+
 /**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
  * @obj - the object to map into kernel address space
+ * @type - the type of mapping, used to select pgprot_t
  *
  * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
  * pages and then returns a contiguous mapping of the backing storage into
- * the kernel address space.
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
  *
  * The caller must hold the struct_mutex, and is responsible for calling
  * i915_gem_object_unpin_map() when the mapping is no longer required.
@@ -3161,7 +3165,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
  * Returns the pointer through which to access the mapped object, or an
  * ERR_PTR() on error.
  */
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj);
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
 
 /**
  * i915_gem_object_unpin_map - releases an earlier mapping
@@ -3848,7 +3853,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	 * is woken.
 	 */
 	if (engine->irq_seqno_barrier &&
-	    READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
+	    rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
 	    cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
 		struct task_struct *tsk;
 
@@ -3873,7 +3878,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 		 * irq_posted == false but we are still running).
 		 */
 		rcu_read_lock();
-		tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+		tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
 		if (tsk && tsk != current)
 			/* Note that if the bottom-half is changed as we
 			 * are sending the wake-up, the new bottom-half will
@@ -3902,4 +3907,16 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	return false;
 }
 
+void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
+bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
+#define ptr_unpack_bits(ptr, bits) ({					\
+	unsigned long __v = (unsigned long)(ptr);			\
+	(bits) = __v & ~PAGE_MASK;					\
+	(typeof(ptr))(__v & PAGE_MASK);					\
+})
+
+#define ptr_pack_bits(ptr, bits)					\
+	((typeof(ptr))((unsigned long)(ptr) | (bits)))
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4f8eaa90f2a..f48c45080a65 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -279,16 +279,25 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
 	.release = i915_gem_object_release_phys,
 };
 
-int
-i915_gem_object_unbind(struct drm_i915_gem_object *obj)
+int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 	LIST_HEAD(still_in_list);
 	int ret;
 
-	/* The vma will only be freed if it is marked as closed, and if we wait
-	 * upon rendering to the vma, we may unbind anything in the list.
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	/* Closed vma are removed from the obj->vma_list - but they may
+	 * still have an active binding on the object. To remove those we
+	 * must wait for all rendering to complete to the object (as unbinding
+	 * must anyway), and retire the requests.
 	 */
+	ret = i915_gem_object_wait_rendering(obj, false);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests(to_i915(obj->base.dev));
+
 	while ((vma = list_first_entry_or_null(&obj->vma_list,
 					       struct i915_vma,
 					       obj_link))) {
@@ -2077,6 +2086,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	list_del(&obj->global_list);
 
 	if (obj->mapping) {
+		/* low bits are ignored by is_vmalloc_addr and kmap_to_page */
 		if (is_vmalloc_addr(obj->mapping))
 			vunmap(obj->mapping);
 		else
@@ -2253,7 +2263,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 }
 
 /* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
 {
 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
 	struct sg_table *sgt = obj->pages;
@@ -2262,10 +2273,11 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	struct page *stack_pages[32];
 	struct page **pages = stack_pages;
 	unsigned long i = 0;
+	pgprot_t pgprot;
 	void *addr;
 
 	/* A single page can always be kmapped */
-	if (n_pages == 1)
+	if (n_pages == 1 && type == I915_MAP_WB)
 		return kmap(sg_page(sgt->sgl));
 
 	if (n_pages > ARRAY_SIZE(stack_pages)) {
@@ -2281,7 +2293,15 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 	/* Check that we have the expected number of pages */
 	GEM_BUG_ON(i != n_pages);
 
-	addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+	switch (type) {
+	case I915_MAP_WB:
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+	addr = vmap(pages, n_pages, 0, pgprot);
 
 	if (pages != stack_pages)
 		drm_free_large(pages);
@@ -2290,27 +2310,54 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
 }
 
 /* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
 {
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
 		return ERR_PTR(ret);
 
 	i915_gem_object_pin_pages(obj);
+	pinned = obj->pages_pin_count > 1;
 
-	if (!obj->mapping) {
-		obj->mapping = i915_gem_object_map(obj);
-		if (!obj->mapping) {
-			i915_gem_object_unpin_pages(obj);
-			return ERR_PTR(-ENOMEM);
+	ptr = ptr_unpack_bits(obj->mapping, has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			ret = -EBUSY;
+			goto err;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		ptr = obj->mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			ret = -ENOMEM;
+			goto err;
 		}
+
+		obj->mapping = ptr_pack_bits(ptr, type);
 	}
 
-	return obj->mapping;
+	return ptr;
+
+err:
+	i915_gem_object_unpin_pages(obj);
+	return ERR_PTR(ret);
 }
 
 static void
@@ -2423,15 +2470,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *request;
 	struct intel_ring *ring;
 
-	request = i915_gem_active_peek(&engine->last_request,
-				       &engine->i915->drm.struct_mutex);
-
 	/* Mark all pending requests as complete so that any concurrent
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
 	 */
-	if (request)
-		intel_engine_init_seqno(engine, request->fence.seqno);
+	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
 
 	/*
 	 * Clear the execlists queue up before freeing the requests, as those
@@ -2453,6 +2496,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 	 * implicit references on things like e.g. ppgtt address spaces through
 	 * the request.
 	 */
+	request = i915_gem_active_raw(&engine->last_request,
+				      &engine->i915->drm.struct_mutex);
 	if (request)
 		i915_gem_request_retire_upto(request);
 	GEM_BUG_ON(intel_engine_is_active(engine));
@@ -2526,7 +2571,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_engine_cs *engine;
-	unsigned int stuck_engines;
 	bool rearm_hangcheck;
 
 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -2556,15 +2600,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	dev_priv->gt.awake = false;
 	rearm_hangcheck = false;
 
-	/* As we have disabled hangcheck, we need to unstick any waiters still
-	 * hanging around. However, as we may be racing against the interrupt
-	 * handler or the waiters themselves, we skip enabling the fake-irq.
-	 */
-	stuck_engines = intel_kick_waiters(dev_priv);
-	if (unlikely(stuck_engines))
-		DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
-				 stuck_engines);
-
 	if (INTEL_GEN(dev_priv) >= 6)
 		gen6_rps_idle(dev_priv);
 	intel_runtime_pm_put(dev_priv);
@@ -3735,7 +3770,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
 }
 
-static __always_inline unsigned __busy_read_flag(unsigned int id)
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
 {
 	/* Note that we could alias engines in the execbuf API, but
 	 * that would be very unwise as it prevents userspace from
@@ -3750,10 +3785,18 @@ static __always_inline unsigned __busy_read_flag(unsigned int id)
 
 static __always_inline unsigned int __busy_write_id(unsigned int id)
 {
-	return id;
+	/* The uABI guarantees an active writer is also amongst the read
+	 * engines. This would be true if we accessed the activity tracking
+	 * under the lock, but as we perform the lookup of the object and
+	 * its activity locklessly we can not guarantee that the last_write
+	 * being active implies that we have set the same engine flag from
+	 * last_read - hence we always set both read and write busy for
+	 * last_write.
+	 */
+	return id | __busy_read_flag(id);
 }
 
-static __always_inline unsigned
+static __always_inline unsigned int
 __busy_set_if_active(const struct i915_gem_active *active,
 		     unsigned int (*flag)(unsigned int id))
 {
@@ -3770,19 +3813,45 @@ __busy_set_if_active(const struct i915_gem_active *active,
 
 		id = request->engine->exec_id;
 
-		/* Check that the pointer wasn't reassigned and overwritten. */
+		/* Check that the pointer wasn't reassigned and overwritten.
+		 *
+		 * In __i915_gem_active_get_rcu(), we enforce ordering between
+		 * the first rcu pointer dereference (imposing a
+		 * read-dependency only on access through the pointer) and
+		 * the second lockless access through the memory barrier
+		 * following a successful atomic_inc_not_zero(). Here there
+		 * is no such barrier, and so we must manually insert an
+		 * explicit read barrier to ensure that the following
+		 * access occurs after all the loads through the first
+		 * pointer.
+		 *
+		 * It is worth comparing this sequence with
+		 * raw_write_seqcount_latch() which operates very similarly.
+		 * The challenge here is the visibility of the other CPU
+		 * writes to the reallocated request vs the local CPU ordering.
+		 * Before the other CPU can overwrite the request, it will
+		 * have updated our active->request and gone through a wmb.
+		 * During the read here, we want to make sure that the values
+		 * we see have not been overwritten as we do so - and we do
+		 * that by serialising the second pointer check with the writes
+		 * on other other CPUs.
+		 *
+		 * The corresponding write barrier is part of
+		 * rcu_assign_pointer().
+		 */
+		smp_rmb();
 		if (request == rcu_access_pointer(active->request))
 			return flag(id);
 	} while (1);
 }
 
-static inline unsigned
+static __always_inline unsigned int
 busy_check_reader(const struct i915_gem_active *active)
 {
 	return __busy_set_if_active(active, __busy_read_flag);
 }
 
-static inline unsigned
+static __always_inline unsigned int
 busy_check_writer(const struct i915_gem_active *active)
 {
 	return __busy_set_if_active(active, __busy_write_id);
@@ -3833,9 +3902,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 			args->busy |= busy_check_reader(&obj->last_read[idx]);
 
 		/* For ABI sanity, we only care that the write engine is in
-		 * the set of read engines. This is ensured by the ordering
-		 * of setting last_read/last_write in i915_vma_move_to_active,
-		 * and then in reverse in retire.
+		 * the set of read engines. This should be ensured by the
+		 * ordering of setting last_read/last_write in
+		 * i915_vma_move_to_active(), and then in reverse in retire.
+		 * However, for good measure, we always report the last_write
+		 * request as a busy read as well as being a busy write.
 		 *
 		 * We don't care that the set of active read/write engines
 		 * may change during construction of the result, as it is
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bb72af5320b0..547caf26a6b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -568,7 +568,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 	const int num_rings =
 		/* Use an extended w/a on ivb+ if signalling from other rings */
 		i915.semaphores ?
-		hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
+		INTEL_INFO(dev_priv)->num_rings - 1 :
 		0;
 	int len, ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index c60a8d5bbad0..10265bb35604 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 	if (ret)
 		return ERR_PTR(ret);
 
-	addr = i915_gem_object_pin_map(obj);
+	addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	mutex_unlock(&dev->struct_mutex);
 
 	return addr;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c494b79ded20..c8d13fea4b25 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1702,6 +1702,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	/* Whilst this request exists, batch_obj will be on the
+	 * active_list, and so will hold the active reference. Only when this
+	 * request is retired will the the batch_obj be moved onto the
+	 * inactive_list and lose its active reference. Hence we do not need
+	 * to explicitly hold another reference here.
+	 */
+	params->request->batch_obj = params->batch->obj;
+
 	ret = i915_gem_request_add_to_client(params->request, file);
 	if (ret)
 		goto err_request;
@@ -1720,7 +1728,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
-	__i915_add_request(params->request, params->batch->obj, ret == 0);
+	__i915_add_request(params->request, ret == 0);
 
 err_batch_unpin:
 	/*
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 18c7c9644761..d876501694c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3449,18 +3449,16 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
 }
 
 static struct sg_table *
-intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
+intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
 			  struct drm_i915_gem_object *obj)
 {
 	const size_t n_pages = obj->base.size / PAGE_SIZE;
-	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
-	unsigned int size_pages_uv;
+	unsigned int size = intel_rotation_info_size(rot_info);
 	struct sgt_iter sgt_iter;
 	dma_addr_t dma_addr;
 	unsigned long i;
 	dma_addr_t *page_addr_list;
 	struct sg_table *st;
-	unsigned int uv_start_page;
 	struct scatterlist *sg;
 	int ret = -ENOMEM;
 
@@ -3471,18 +3469,12 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 	if (!page_addr_list)
 		return ERR_PTR(ret);
 
-	/* Account for UV plane with NV12. */
-	if (rot_info->pixel_format == DRM_FORMAT_NV12)
-		size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height;
-	else
-		size_pages_uv = 0;
-
 	/* Allocate target SG list. */
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
 		goto err_st_alloc;
 
-	ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
 	if (ret)
 		goto err_sg_alloc;
 
@@ -3495,32 +3487,14 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 	st->nents = 0;
 	sg = st->sgl;
 
-	/* Rotate the pages. */
-	sg = rotate_pages(page_addr_list, 0,
-			  rot_info->plane[0].width, rot_info->plane[0].height,
-			  rot_info->plane[0].width,
-			  st, sg);
-
-	/* Append the UV plane if NV12. */
-	if (rot_info->pixel_format == DRM_FORMAT_NV12) {
-		uv_start_page = size_pages;
-
-		/* Check for tile-row un-alignment. */
-		if (offset_in_page(rot_info->uv_offset))
-			uv_start_page--;
-
-		rot_info->uv_start_page = uv_start_page;
-
-		sg = rotate_pages(page_addr_list, rot_info->uv_start_page,
-				  rot_info->plane[1].width, rot_info->plane[1].height,
-				  rot_info->plane[1].width,
-				  st, sg);
+	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
+		sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
+				  rot_info->plane[i].width, rot_info->plane[i].height,
+				  rot_info->plane[i].stride, st, sg);
 	}
 
-	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n",
-		      obj->base.size, rot_info->plane[0].width,
-		      rot_info->plane[0].height, size_pages + size_pages_uv,
-		      size_pages);
+	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
+		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
 
 	drm_free_large(page_addr_list);
 
@@ -3531,10 +3505,9 @@ err_sg_alloc:
 err_st_alloc:
 	drm_free_large(page_addr_list);
 
-	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n",
-		      obj->base.size, ret, rot_info->plane[0].width,
-		      rot_info->plane[0].height, size_pages + size_pages_uv,
-		      size_pages);
+	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+
 	return ERR_PTR(ret);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index cc56206a1600..56e64a5355e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -139,12 +139,9 @@ enum i915_ggtt_view_type {
 };
 
 struct intel_rotation_info {
-	unsigned int uv_offset;
-	uint32_t pixel_format;
-	unsigned int uv_start_page;
 	struct {
 		/* tiles */
-		unsigned int width, height;
+		unsigned int width, height, stride, offset;
 	} plane[2];
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6a1661643d3d..b764c1d440c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -355,7 +355,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (req && i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
-	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+	/* Beware: Dragons be flying overhead.
+	 *
+	 * We use RCU to look up requests in flight. The lookups may
+	 * race with the request being allocated from the slab freelist.
+	 * That is the request we are writing to here, may be in the process
+	 * of being read by __i915_gem_active_get_rcu(). As such,
+	 * we have to be very careful when overwriting the contents. During
+	 * the RCU lookup, we change chase the request->engine pointer,
+	 * read the request->fence.seqno and increment the reference count.
+	 *
+	 * The reference count is incremented atomically. If it is zero,
+	 * the lookup knows the request is unallocated and complete. Otherwise,
+	 * it is either still in use, or has been reallocated and reset
+	 * with fence_init(). This increment is safe for release as we check
+	 * that the request we have a reference to and matches the active
+	 * request.
+	 *
+	 * Before we increment the refcount, we chase the request->engine
+	 * pointer. We must not call kmem_cache_zalloc() or else we set
+	 * that pointer to NULL and cause a crash during the lookup. If
+	 * we see the request is completed (based on the value of the
+	 * old engine and seqno), the lookup is complete and reports NULL.
+	 * If we decide the request is not completed (new engine or seqno),
+	 * then we grab a reference and double check that it is still the
+	 * active request - which it won't be and restart the lookup.
+	 *
+	 * Do not use kmem_cache_zalloc() here!
+	 */
+	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 
@@ -375,6 +403,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->engine = engine;
 	req->ctx = i915_gem_context_get(ctx);
 
+	/* No zalloc, must clear what we need by hand */
+	req->previous_context = NULL;
+	req->file_priv = NULL;
+	req->batch_obj = NULL;
+	req->pid = NULL;
+	req->elsp_submitted = 0;
+
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -426,9 +461,7 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void __i915_add_request(struct drm_i915_gem_request *request,
-			struct drm_i915_gem_object *obj,
-			bool flush_caches)
+void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine;
 	struct intel_ring *ring;
@@ -469,14 +502,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 
 	request->head = request_start;
 
-	/* Whilst this request exists, batch_obj will be on the
-	 * active_list, and so will hold the active reference. Only when this
-	 * request is retired will the the batch_obj be moved onto the
-	 * inactive_list and lose its active reference. Hence we do not need
-	 * to explicitly hold another reference here.
-	 */
-	request->batch_obj = obj;
-
 	/* Seal the request and mark it as pending execution. Note that
 	 * we may inspect this state, without holding any locks, during
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 3496e28785e7..85393be09c27 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -51,6 +51,13 @@ struct intel_signal_node {
  * emission time to be associated with the request for tracking how far ahead
  * of the GPU the submission is.
  *
+ * When modifying this structure be very aware that we perform a lockless
+ * RCU lookup of it that may race against reallocation of the struct
+ * from the slab freelist. We intentionally do not zero the structure on
+ * allocation so that the lookup can use the dangling pointers (and is
+ * cogniscent that those pointers may be wrong). Instead, everything that
+ * needs to be initialised must be done so explicitly.
+ *
  * The requests are reference counted.
  */
 struct drm_i915_gem_request {
@@ -218,13 +225,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
-void __i915_add_request(struct drm_i915_gem_request *req,
-			struct drm_i915_gem_object *batch_obj,
-			bool flush_caches);
+void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
-	__i915_add_request(req, NULL, true)
+	__i915_add_request(req, true)
 #define i915_add_request_no_flush(req) \
-	__i915_add_request(req, NULL, false)
+	__i915_add_request(req, false)
 
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
@@ -360,41 +365,34 @@ __i915_gem_active_peek(const struct i915_gem_active *active)
 }
 
 /**
- * i915_gem_active_peek - report the active request being monitored
+ * i915_gem_active_raw - return the active request
  * @active - the active tracker
  *
- * i915_gem_active_peek() returns the current request being tracked if
- * still active, or NULL. It does not obtain a reference on the request
- * for the caller, so the caller must hold struct_mutex.
+ * i915_gem_active_raw() returns the current request being tracked, or NULL.
+ * It does not obtain a reference on the request for the caller, so the caller
+ * must hold struct_mutex.
  */
 static inline struct drm_i915_gem_request *
-i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
+i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
 {
-	struct drm_i915_gem_request *request;
-
-	request = rcu_dereference_protected(active->request,
-					    lockdep_is_held(mutex));
-	if (!request || i915_gem_request_completed(request))
-		return NULL;
-
-	return request;
+	return rcu_dereference_protected(active->request,
+					 lockdep_is_held(mutex));
 }
 
 /**
- * i915_gem_active_peek_rcu - report the active request being monitored
+ * i915_gem_active_peek - report the active request being monitored
  * @active - the active tracker
  *
- * i915_gem_active_peek_rcu() returns the current request being tracked if
+ * i915_gem_active_peek() returns the current request being tracked if
  * still active, or NULL. It does not obtain a reference on the request
- * for the caller, and inspection of the request is only valid under
- * the RCU lock.
+ * for the caller, so the caller must hold struct_mutex.
  */
 static inline struct drm_i915_gem_request *
-i915_gem_active_peek_rcu(const struct i915_gem_active *active)
+i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
 
-	request = rcu_dereference(active->request);
+	request = i915_gem_active_raw(active, mutex);
 	if (!request || i915_gem_request_completed(request))
 		return NULL;
 
@@ -465,6 +463,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 	 * just report the active tracker is idle. If the new request is
 	 * incomplete, then we acquire a reference on it and check that
 	 * it remained the active request.
+	 *
+	 * It is then imperative that we do not zero the request on
+	 * reallocation, so that we can chase the dangling pointers!
+	 * See i915_gem_request_alloc().
 	 */
 	do {
 		struct drm_i915_gem_request *request;
@@ -497,6 +499,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
 		 * incremented) then the following read for rcu_access_pointer()
 		 * must occur after the atomic operation and so confirm
 		 * that this request is the one currently being tracked.
+		 *
+		 * The corresponding write barrier is part of
+		 * rcu_assign_pointer().
 		 */
 		if (!request || request == rcu_access_pointer(active->request))
 			return rcu_pointer_handoff(request);
@@ -635,8 +640,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	struct drm_i915_gem_request *request;
 	int ret;
 
-	request = rcu_dereference_protected(active->request,
-					    lockdep_is_held(mutex));
+	request = i915_gem_active_raw(active, mutex);
 	if (!request)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 13279610eeec..f7633585ea38 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -115,17 +115,28 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 
 		base = bsm & INTEL_BSM_MASK;
 	} else if (IS_I865G(dev)) {
+		u32 tseg_size = 0;
 		u16 toud = 0;
+		u8 tmp;
+
+		pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0),
+					 I845_ESMRAMC, &tmp);
+
+		if (tmp & TSEG_ENABLE) {
+			switch (tmp & I845_TSEG_SIZE_MASK) {
+			case I845_TSEG_SIZE_512K:
+				tseg_size = KB(512);
+				break;
+			case I845_TSEG_SIZE_1M:
+				tseg_size = MB(1);
+				break;
+			}
+		}
 
-		/*
-		 * FIXME is the graphics stolen memory region
-		 * always at TOUD? Ie. is it always the last
-		 * one to be allocated by the BIOS?
-		 */
 		pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0),
 					 I865_TOUD, &toud);
 
-		base = toud << 16;
+		base = (toud << 16) + tseg_size;
 	} else if (IS_I85X(dev)) {
 		u32 tseg_size = 0;
 		u32 tom;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 03a5cef353eb..6831321a9c8c 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -340,9 +340,10 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 	desc.priority = client->priority;
 	desc.db_id = client->doorbell_id;
 
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_masked(engine, dev_priv, client->engines) {
 		struct intel_context *ce = &ctx->engine[engine->id];
-		struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
+		uint32_t guc_engine_id = engine->guc_id;
+		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
 		struct drm_i915_gem_object *obj;
 
 		/* TODO: We have a design issue to be solved here. Only when we
@@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
 		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
+				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 
 		obj = ce->ring->obj;
 		gfx_addr = i915_gem_obj_ggtt_offset(obj);
@@ -371,9 +372,11 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		lrc->ring_next_free_location = gfx_addr;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc.engines_used |= (1 << engine->guc_id);
+		desc.engines_used |= (1 << guc_engine_id);
 	}
 
+	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
+			client->engines, desc.engines_used);
 	WARN_ON(desc.engines_used == 0);
 
 	/*
@@ -457,6 +460,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	/* wqi_len is in DWords, and does not include the one-word header */
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
+	struct intel_engine_cs *engine = rq->engine;
 	struct guc_process_desc *desc;
 	struct guc_wq_item *wqi;
 	void *base;
@@ -498,12 +502,11 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	/* Now fill in the 4-word work queue item */
 	wqi->header = WQ_TYPE_INORDER |
 			(wqi_len << WQ_LEN_SHIFT) |
-			(rq->engine->guc_id << WQ_TARGET_SHIFT) |
+			(engine->guc_id << WQ_TARGET_SHIFT) |
 			WQ_NO_WCFLUSH_WAIT;
 
 	/* The GuC wants only the low-order word of the context descriptor */
-	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
-							     rq->engine);
+	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
 	wqi->fence_id = rq->fence.seqno;
@@ -696,29 +699,47 @@ guc_client_free(struct drm_i915_private *dev_priv,
 	kfree(client);
 }
 
+/* Check that a doorbell register is in the expected state */
+static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	i915_reg_t drbreg = GEN8_DRBREGL(db_id);
+	uint32_t value = I915_READ(drbreg);
+	bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
+	bool expected = test_bit(db_id, guc->doorbell_bitmap);
+
+	if (enabled == expected)
+		return true;
+
+	DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
+			 db_id, drbreg.reg, value,
+			 expected ? "active" : "inactive");
+
+	return false;
+}
+
 /*
- * Borrow the first client to set up & tear down every doorbell
+ * Borrow the first client to set up & tear down each unused doorbell
  * in turn, to ensure that all doorbell h/w is (re)initialised.
  */
 static void guc_init_doorbell_hw(struct intel_guc *guc)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct i915_guc_client *client = guc->execbuf_client;
-	uint16_t db_id, i;
-	int err;
+	uint16_t db_id;
+	int i, err;
 
+	/* Save client's original doorbell selection */
 	db_id = client->doorbell_id;
 
 	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
-		i915_reg_t drbreg = GEN8_DRBREGL(i);
-		u32 value = I915_READ(drbreg);
+		/* Skip if doorbell is OK */
+		if (guc_doorbell_check(guc, i))
+			continue;
 
 		err = guc_update_doorbell_id(guc, client, i);
-
-		/* Report update failure or unexpectedly active doorbell */
-		if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED)))
-			DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n",
-					  i, drbreg.reg, value, err);
+		if (err)
+			DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
+					i, err);
 	}
 
 	/* Restore to original value */
@@ -727,15 +748,9 @@ static void guc_init_doorbell_hw(struct intel_guc *guc)
 		DRM_ERROR("Failed to restore doorbell to %d, err %d\n",
 			db_id, err);
 
-	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
-		i915_reg_t drbreg = GEN8_DRBREGL(i);
-		u32 value = I915_READ(drbreg);
-
-		if (i != db_id && (value & GUC_DOORBELL_ENABLED))
-			DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n",
-					  i, drbreg.reg, value);
-
-	}
+	/* Read back & verify all doorbell registers */
+	for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
+		(void)guc_doorbell_check(guc, i);
 }
 
 /**
@@ -752,6 +767,7 @@ static void guc_init_doorbell_hw(struct intel_guc *guc)
  */
 static struct i915_guc_client *
 guc_client_alloc(struct drm_i915_private *dev_priv,
+		 uint32_t engines,
 		 uint32_t priority,
 		 struct i915_gem_context *ctx)
 {
@@ -764,10 +780,11 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	if (!client)
 		return NULL;
 
-	client->doorbell_id = GUC_INVALID_DOORBELL_ID;
-	client->priority = priority;
 	client->owner = ctx;
 	client->guc = guc;
+	client->engines = engines;
+	client->priority = priority;
+	client->doorbell_id = GUC_INVALID_DOORBELL_ID;
 
 	client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
 			GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
@@ -809,8 +826,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	if (guc_init_doorbell(guc, client, db_id))
 		goto err;
 
-	DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n",
-		priority, client, client->ctx_index);
+	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
+		priority, client, client->engines, client->ctx_index);
 	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
 		client->doorbell_id, client->doorbell_offset);
 
@@ -994,6 +1011,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 
 	/* client for execbuf submission */
 	client = guc_client_alloc(dev_priv,
+				  INTEL_INFO(dev_priv)->ring_mask,
 				  GUC_CTX_PRIORITY_KMD_NORMAL,
 				  dev_priv->kernel_context);
 	if (!client) {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 591f452ece68..ebb83d5a448b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -972,10 +972,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 static void notify_ring(struct intel_engine_cs *engine)
 {
 	smp_store_mb(engine->breadcrumbs.irq_posted, true);
-	if (intel_engine_wakeup(engine)) {
+	if (intel_engine_wakeup(engine))
 		trace_i915_gem_request_notify(engine);
-		engine->breadcrumbs.irq_wakeups++;
-	}
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -3044,22 +3042,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 	return HANGCHECK_HUNG;
 }
 
-static unsigned long kick_waiters(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
-
-	if (engine->hangcheck.user_interrupts == irq_count &&
-	    !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
-		if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
-			DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
-				  engine->name);
-
-		intel_engine_enable_fake_irq(engine);
-	}
-
-	return irq_count;
-}
 /*
  * This is called when the chip hasn't reported back with completed
  * batchbuffers in a long time. We keep track per ring seqno progress and
@@ -3097,7 +3079,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		bool busy = intel_engine_has_waiter(engine);
 		u64 acthd;
 		u32 seqno;
-		unsigned user_interrupts;
 
 		semaphore_clear_deadlocks(dev_priv);
 
@@ -3114,15 +3095,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		acthd = intel_engine_get_active_head(engine);
 		seqno = intel_engine_get_seqno(engine);
 
-		/* Reset stuck interrupts between batch advances */
-		user_interrupts = 0;
-
 		if (engine->hangcheck.seqno == seqno) {
 			if (!intel_engine_is_active(engine)) {
 				engine->hangcheck.action = HANGCHECK_IDLE;
 				if (busy) {
 					/* Safeguard against driver failure */
-					user_interrupts = kick_waiters(engine);
 					engine->hangcheck.score += BUSY;
 				}
 			} else {
@@ -3185,7 +3162,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 		engine->hangcheck.seqno = seqno;
 		engine->hangcheck.acthd = acthd;
-		engine->hangcheck.user_interrupts = user_interrupts;
 		busy_count += busy;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
new file mode 100644
index 000000000000..6f1df0ec8a81
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <asm/fpu/api.h>
+
+#include "i915_drv.h"
+
+DEFINE_STATIC_KEY_FALSE(has_movntdqa);
+
+#ifdef CONFIG_AS_MOVNTDQA
+static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
+{
+	kernel_fpu_begin();
+
+	len >>= 4;
+	while (len >= 4) {
+		asm("movntdqa   (%0), %%xmm0\n"
+		    "movntdqa 16(%0), %%xmm1\n"
+		    "movntdqa 32(%0), %%xmm2\n"
+		    "movntdqa 48(%0), %%xmm3\n"
+		    "movaps %%xmm0,   (%1)\n"
+		    "movaps %%xmm1, 16(%1)\n"
+		    "movaps %%xmm2, 32(%1)\n"
+		    "movaps %%xmm3, 48(%1)\n"
+		    :: "r" (src), "r" (dst) : "memory");
+		src += 64;
+		dst += 64;
+		len -= 4;
+	}
+	while (len--) {
+		asm("movntdqa (%0), %%xmm0\n"
+		    "movaps %%xmm0, (%1)\n"
+		    :: "r" (src), "r" (dst) : "memory");
+		src += 16;
+		dst += 16;
+	}
+
+	kernel_fpu_end();
+}
+#endif
+
+/**
+ * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * @dst: destination pointer
+ * @src: source pointer
+ * @len: how many bytes to copy
+ *
+ * i915_memcpy_from_wc copies @len bytes from @src to @dst using
+ * non-temporal instructions where available. Note that all arguments
+ * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
+ * of 16.
+ *
+ * To test whether accelerated reads from WC are supported, use
+ * i915_memcpy_from_wc(NULL, NULL, 0);
+ *
+ * Returns true if the copy was successful, false if the preconditions
+ * are not met.
+ */
+bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
+		return false;
+
+#ifdef CONFIG_AS_MOVNTDQA
+	if (static_branch_likely(&has_movntdqa)) {
+		if (likely(len))
+			__memcpy_ntdqa(dst, src, len);
+		return true;
+	}
+#endif
+
+	return false;
+}
+
+void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
+{
+	if (static_cpu_has(X86_FEATURE_XMM4_1))
+		static_branch_enable(&has_movntdqa);
+}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index b6e404c91eed..768ad89d9cd4 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -45,6 +45,7 @@ struct i915_params i915 __read_mostly = {
 	.fastboot = 0,
 	.prefault_disable = 0,
 	.load_detect_test = 0,
+	.force_reset_modeset_test = 0,
 	.reset = true,
 	.invert_brightness = 0,
 	.disable_display = 0,
@@ -161,6 +162,11 @@ MODULE_PARM_DESC(load_detect_test,
 	"Force-enable the VGA load detect code for testing (default:false). "
 	"For developers only.");
 
+module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600);
+MODULE_PARM_DESC(force_reset_modeset_test,
+	"Force a modeset during gpu reset for testing (default:false). "
+	"For developers only.");
+
 module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600);
 MODULE_PARM_DESC(invert_brightness,
 	"Invert backlight brightness "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 0ad020b4a925..3a0dd78ddb38 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -57,6 +57,7 @@ struct i915_params {
 	bool fastboot;
 	bool prefault_disable;
 	bool load_detect_test;
+	bool force_reset_modeset_test;
 	bool reset;
 	bool disable_display;
 	bool verbose_state_checks;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f38a5e20bbee..da82744f3a3b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3660,8 +3660,17 @@ enum {
 #define   VIDEO_DIP_ENABLE_SPD_HSW	(1 << 0)
 
 /* Panel power sequencing */
-#define PP_STATUS	_MMIO(0x61200)
-#define   PP_ON		(1 << 31)
+#define PPS_BASE			0x61200
+#define VLV_PPS_BASE			(VLV_DISPLAY_BASE + PPS_BASE)
+#define PCH_PPS_BASE			0xC7200
+
+#define _MMIO_PPS(pps_idx, reg)		_MMIO(dev_priv->pps_mmio_base -	\
+					      PPS_BASE + (reg) +	\
+					      (pps_idx) * 0x100)
+
+#define _PP_STATUS			0x61200
+#define PP_STATUS(pps_idx)		_MMIO_PPS(pps_idx, _PP_STATUS)
+#define   PP_ON				(1 << 31)
 /*
  * Indicates that all dependencies of the panel are on:
  *
@@ -3669,14 +3678,14 @@ enum {
  * - pipe enabled
  * - LVDS/DVOB/DVOC on
  */
-#define   PP_READY		(1 << 30)
-#define   PP_SEQUENCE_NONE	(0 << 28)
-#define   PP_SEQUENCE_POWER_UP	(1 << 28)
-#define   PP_SEQUENCE_POWER_DOWN (2 << 28)
-#define   PP_SEQUENCE_MASK	(3 << 28)
-#define   PP_SEQUENCE_SHIFT	28
-#define   PP_CYCLE_DELAY_ACTIVE	(1 << 27)
-#define   PP_SEQUENCE_STATE_MASK 0x0000000f
+#define   PP_READY			(1 << 30)
+#define   PP_SEQUENCE_NONE		(0 << 28)
+#define   PP_SEQUENCE_POWER_UP		(1 << 28)
+#define   PP_SEQUENCE_POWER_DOWN	(2 << 28)
+#define   PP_SEQUENCE_MASK		(3 << 28)
+#define   PP_SEQUENCE_SHIFT		28
+#define   PP_CYCLE_DELAY_ACTIVE		(1 << 27)
+#define   PP_SEQUENCE_STATE_MASK	0x0000000f
 #define   PP_SEQUENCE_STATE_OFF_IDLE	(0x0 << 0)
 #define   PP_SEQUENCE_STATE_OFF_S0_1	(0x1 << 0)
 #define   PP_SEQUENCE_STATE_OFF_S0_2	(0x2 << 0)
@@ -3686,11 +3695,46 @@ enum {
 #define   PP_SEQUENCE_STATE_ON_S1_2	(0xa << 0)
 #define   PP_SEQUENCE_STATE_ON_S1_3	(0xb << 0)
 #define   PP_SEQUENCE_STATE_RESET	(0xf << 0)
-#define PP_CONTROL	_MMIO(0x61204)
-#define   POWER_TARGET_ON	(1 << 0)
-#define PP_ON_DELAYS	_MMIO(0x61208)
-#define PP_OFF_DELAYS	_MMIO(0x6120c)
-#define PP_DIVISOR	_MMIO(0x61210)
+
+#define _PP_CONTROL			0x61204
+#define PP_CONTROL(pps_idx)		_MMIO_PPS(pps_idx, _PP_CONTROL)
+#define  PANEL_UNLOCK_REGS		(0xabcd << 16)
+#define  PANEL_UNLOCK_MASK		(0xffff << 16)
+#define  BXT_POWER_CYCLE_DELAY_MASK	0x1f0
+#define  BXT_POWER_CYCLE_DELAY_SHIFT	4
+#define  EDP_FORCE_VDD			(1 << 3)
+#define  EDP_BLC_ENABLE			(1 << 2)
+#define  PANEL_POWER_RESET		(1 << 1)
+#define  PANEL_POWER_OFF		(0 << 0)
+#define  PANEL_POWER_ON			(1 << 0)
+
+#define _PP_ON_DELAYS			0x61208
+#define PP_ON_DELAYS(pps_idx)		_MMIO_PPS(pps_idx, _PP_ON_DELAYS)
+#define  PANEL_PORT_SELECT_SHIFT	30
+#define  PANEL_PORT_SELECT_MASK		(3 << 30)
+#define  PANEL_PORT_SELECT_LVDS		(0 << 30)
+#define  PANEL_PORT_SELECT_DPA		(1 << 30)
+#define  PANEL_PORT_SELECT_DPC		(2 << 30)
+#define  PANEL_PORT_SELECT_DPD		(3 << 30)
+#define  PANEL_PORT_SELECT_VLV(port)	((port) << 30)
+#define  PANEL_POWER_UP_DELAY_MASK	0x1fff0000
+#define  PANEL_POWER_UP_DELAY_SHIFT	16
+#define  PANEL_LIGHT_ON_DELAY_MASK	0x1fff
+#define  PANEL_LIGHT_ON_DELAY_SHIFT	0
+
+#define _PP_OFF_DELAYS			0x6120C
+#define PP_OFF_DELAYS(pps_idx)		_MMIO_PPS(pps_idx, _PP_OFF_DELAYS)
+#define  PANEL_POWER_DOWN_DELAY_MASK	0x1fff0000
+#define  PANEL_POWER_DOWN_DELAY_SHIFT	16
+#define  PANEL_LIGHT_OFF_DELAY_MASK	0x1fff
+#define  PANEL_LIGHT_OFF_DELAY_SHIFT	0
+
+#define _PP_DIVISOR			0x61210
+#define PP_DIVISOR(pps_idx)		_MMIO_PPS(pps_idx, _PP_DIVISOR)
+#define  PP_REFERENCE_DIVIDER_MASK	0xffffff00
+#define  PP_REFERENCE_DIVIDER_SHIFT	8
+#define  PANEL_POWER_CYCLE_DELAY_MASK	0x1f
+#define  PANEL_POWER_CYCLE_DELAY_SHIFT	0
 
 /* Panel fitting */
 #define PFIT_CONTROL	_MMIO(dev_priv->info.display_mmio_offset + 0x61230)
@@ -6750,77 +6794,6 @@ enum {
 #define PCH_LVDS	_MMIO(0xe1180)
 #define  LVDS_DETECTED	(1 << 1)
 
-/* vlv has 2 sets of panel control regs. */
-#define _PIPEA_PP_STATUS         (VLV_DISPLAY_BASE + 0x61200)
-#define _PIPEA_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61204)
-#define _PIPEA_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61208)
-#define  PANEL_PORT_SELECT_VLV(port)	((port) << 30)
-#define _PIPEA_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6120c)
-#define _PIPEA_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61210)
-
-#define _PIPEB_PP_STATUS         (VLV_DISPLAY_BASE + 0x61300)
-#define _PIPEB_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61304)
-#define _PIPEB_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61308)
-#define _PIPEB_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6130c)
-#define _PIPEB_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61310)
-
-#define VLV_PIPE_PP_STATUS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS)
-#define VLV_PIPE_PP_CONTROL(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL)
-#define VLV_PIPE_PP_ON_DELAYS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS)
-#define VLV_PIPE_PP_OFF_DELAYS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS)
-#define VLV_PIPE_PP_DIVISOR(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR)
-
-#define _PCH_PP_STATUS		0xc7200
-#define _PCH_PP_CONTROL		0xc7204
-#define  PANEL_UNLOCK_REGS	(0xabcd << 16)
-#define  PANEL_UNLOCK_MASK	(0xffff << 16)
-#define  BXT_POWER_CYCLE_DELAY_MASK	(0x1f0)
-#define  BXT_POWER_CYCLE_DELAY_SHIFT	4
-#define  EDP_FORCE_VDD		(1 << 3)
-#define  EDP_BLC_ENABLE		(1 << 2)
-#define  PANEL_POWER_RESET	(1 << 1)
-#define  PANEL_POWER_OFF	(0 << 0)
-#define  PANEL_POWER_ON		(1 << 0)
-#define _PCH_PP_ON_DELAYS	0xc7208
-#define  PANEL_PORT_SELECT_MASK	(3 << 30)
-#define  PANEL_PORT_SELECT_LVDS	(0 << 30)
-#define  PANEL_PORT_SELECT_DPA	(1 << 30)
-#define  PANEL_PORT_SELECT_DPC	(2 << 30)
-#define  PANEL_PORT_SELECT_DPD	(3 << 30)
-#define  PANEL_POWER_UP_DELAY_MASK	(0x1fff0000)
-#define  PANEL_POWER_UP_DELAY_SHIFT	16
-#define  PANEL_LIGHT_ON_DELAY_MASK	(0x1fff)
-#define  PANEL_LIGHT_ON_DELAY_SHIFT	0
-
-#define _PCH_PP_OFF_DELAYS		0xc720c
-#define  PANEL_POWER_DOWN_DELAY_MASK	(0x1fff0000)
-#define  PANEL_POWER_DOWN_DELAY_SHIFT	16
-#define  PANEL_LIGHT_OFF_DELAY_MASK	(0x1fff)
-#define  PANEL_LIGHT_OFF_DELAY_SHIFT	0
-
-#define _PCH_PP_DIVISOR			0xc7210
-#define  PP_REFERENCE_DIVIDER_MASK	(0xffffff00)
-#define  PP_REFERENCE_DIVIDER_SHIFT	8
-#define  PANEL_POWER_CYCLE_DELAY_MASK	(0x1f)
-#define  PANEL_POWER_CYCLE_DELAY_SHIFT	0
-
-#define PCH_PP_STATUS			_MMIO(_PCH_PP_STATUS)
-#define PCH_PP_CONTROL			_MMIO(_PCH_PP_CONTROL)
-#define PCH_PP_ON_DELAYS		_MMIO(_PCH_PP_ON_DELAYS)
-#define PCH_PP_OFF_DELAYS		_MMIO(_PCH_PP_OFF_DELAYS)
-#define PCH_PP_DIVISOR			_MMIO(_PCH_PP_DIVISOR)
-
-/* BXT PPS changes - 2nd set of PPS registers */
-#define _BXT_PP_STATUS2 	0xc7300
-#define _BXT_PP_CONTROL2 	0xc7304
-#define _BXT_PP_ON_DELAYS2	0xc7308
-#define _BXT_PP_OFF_DELAYS2	0xc730c
-
-#define BXT_PP_STATUS(n)	_MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2)
-#define BXT_PP_CONTROL(n)	_MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2)
-#define BXT_PP_ON_DELAYS(n)	_MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
-#define BXT_PP_OFF_DELAYS(n)	_MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
-
 #define _PCH_DP_B		0xe4100
 #define PCH_DP_B		_MMIO(_PCH_DP_B)
 #define _PCH_DPB_AUX_CH_CTL	0xe4110
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 5cfe4c7716b4..4f272777b4f4 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -37,25 +37,6 @@ static void i915_save_display(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen <= 4)
 		dev_priv->regfile.saveDSPARB = I915_READ(DSPARB);
 
-	/* LVDS state */
-	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
-		dev_priv->regfile.saveLVDS = I915_READ(PCH_LVDS);
-	else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev))
-		dev_priv->regfile.saveLVDS = I915_READ(LVDS);
-
-	/* Panel power sequencer */
-	if (HAS_PCH_SPLIT(dev)) {
-		dev_priv->regfile.savePP_CONTROL = I915_READ(PCH_PP_CONTROL);
-		dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS);
-		dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS);
-		dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR);
-	} else if (INTEL_INFO(dev)->gen <= 4) {
-		dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL);
-		dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS);
-		dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS);
-		dev_priv->regfile.savePP_DIVISOR = I915_READ(PP_DIVISOR);
-	}
-
 	/* save FBC interval */
 	if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev))
 		dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL);
@@ -64,33 +45,11 @@ static void i915_save_display(struct drm_device *dev)
 static void i915_restore_display(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	u32 mask = 0xffffffff;
 
 	/* Display arbitration */
 	if (INTEL_INFO(dev)->gen <= 4)
 		I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB);
 
-	mask = ~LVDS_PORT_EN;
-
-	/* LVDS state */
-	if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev))
-		I915_WRITE(PCH_LVDS, dev_priv->regfile.saveLVDS & mask);
-	else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev))
-		I915_WRITE(LVDS, dev_priv->regfile.saveLVDS & mask);
-
-	/* Panel power sequencer */
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
-		I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
-		I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
-		I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
-	} else if (INTEL_INFO(dev)->gen <= 4) {
-		I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS);
-		I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS);
-		I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR);
-		I915_WRITE(PP_CONTROL, dev_priv->regfile.savePP_CONTROL);
-	}
-
 	/* only restore FBC info on the platform that supports FBC*/
 	intel_fbc_global_disable(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 90867446f1a5..2491e4c1eaf0 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,40 @@
 
 #include "i915_drv.h"
 
+static void intel_breadcrumbs_hangcheck(unsigned long data)
+{
+	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	if (!b->irq_enabled)
+		return;
+
+	if (time_before(jiffies, b->timeout)) {
+		mod_timer(&b->hangcheck, b->timeout);
+		return;
+	}
+
+	DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
+	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+
+	/* Ensure that even if the GPU hangs, we get woken up.
+	 *
+	 * However, note that if no one is waiting, we never notice
+	 * a gpu hang. Eventually, we will have to wait for a resource
+	 * held by the GPU and so trigger a hangcheck. In the most
+	 * pathological case, this will be upon memory starvation! To
+	 * prevent this, we also queue the hangcheck from the retire
+	 * worker.
+	 */
+	i915_queue_hangcheck(engine->i915);
+}
+
+static unsigned long wait_timeout(void)
+{
+	return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
 static void intel_breadcrumbs_fake_irq(unsigned long data)
 {
 	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -37,10 +71,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
 	 * every jiffie in order to kick the oldest waiter to do the
 	 * coherent seqno check.
 	 */
-	rcu_read_lock();
 	if (intel_engine_wakeup(engine))
 		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-	rcu_read_unlock();
 }
 
 static void irq_enable(struct intel_engine_cs *engine)
@@ -51,13 +83,6 @@ static void irq_enable(struct intel_engine_cs *engine)
 	 */
 	engine->breadcrumbs.irq_posted = true;
 
-	/* Make sure the current hangcheck doesn't falsely accuse a just
-	 * started irq handler from missing an interrupt (because the
-	 * interrupt count still matches the stale value from when
-	 * the irq handler was disabled, many hangchecks ago).
-	 */
-	engine->breadcrumbs.irq_wakeups++;
-
 	spin_lock_irq(&engine->i915->irq_lock);
 	engine->irq_enable(engine);
 	spin_unlock_irq(&engine->i915->irq_lock);
@@ -98,17 +123,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
 	}
 
 	if (!b->irq_enabled ||
-	    test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
+	    test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
 		mod_timer(&b->fake_irq, jiffies + 1);
-
-	/* Ensure that even if the GPU hangs, we get woken up.
-	 *
-	 * However, note that if no one is waiting, we never notice
-	 * a gpu hang. Eventually, we will have to wait for a resource
-	 * held by the GPU and so trigger a hangcheck. In the most
-	 * pathological case, this will be upon memory starvation!
-	 */
-	i915_queue_hangcheck(i915);
+	} else {
+		/* Ensure we never sleep indefinitely */
+		GEM_BUG_ON(!time_after(b->timeout, jiffies));
+		mod_timer(&b->hangcheck, b->timeout);
+	}
 }
 
 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
@@ -211,7 +232,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 	}
 	rb_link_node(&wait->node, parent, p);
 	rb_insert_color(&wait->node, &b->waiters);
-	GEM_BUG_ON(!first && !b->irq_seqno_bh);
+	GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
 
 	if (completed) {
 		struct rb_node *next = rb_next(completed);
@@ -219,8 +240,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 		GEM_BUG_ON(!next && !first);
 		if (next && next != &wait->node) {
 			GEM_BUG_ON(first);
+			b->timeout = wait_timeout();
 			b->first_wait = to_wait(next);
-			smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
+			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
 			/* As there is a delay between reading the current
 			 * seqno, processing the completed tasks and selecting
 			 * the next waiter, we may have missed the interrupt
@@ -245,8 +267,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 
 	if (first) {
 		GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
+		b->timeout = wait_timeout();
 		b->first_wait = wait;
-		smp_store_mb(b->irq_seqno_bh, wait->tsk);
+		rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
 		/* After assigning ourselves as the new bottom-half, we must
 		 * perform a cursory check to prevent a missed interrupt.
 		 * Either we miss the interrupt whilst programming the hardware,
@@ -257,7 +280,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 		 */
 		__intel_breadcrumbs_enable_irq(b);
 	}
-	GEM_BUG_ON(!b->irq_seqno_bh);
+	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
 	GEM_BUG_ON(!b->first_wait);
 	GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
 
@@ -277,11 +300,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
 	return first;
 }
 
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
-{
-	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-}
-
 static inline bool chain_wakeup(struct rb_node *rb, int priority)
 {
 	return rb && to_wait(rb)->tsk->prio <= priority;
@@ -317,7 +335,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 		const int priority = wakeup_priority(b, wait->tsk);
 		struct rb_node *next;
 
-		GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
+		GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
 
 		/* We are the current bottom-half. Find the next candidate,
 		 * the first waiter in the queue on the remaining oldest
@@ -359,14 +377,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			 * the interrupt, or if we have to handle an
 			 * exception rather than a seqno completion.
 			 */
+			b->timeout = wait_timeout();
 			b->first_wait = to_wait(next);
-			smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
+			rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
 			if (b->first_wait->seqno != wait->seqno)
 				__intel_breadcrumbs_enable_irq(b);
-			wake_up_process(b->irq_seqno_bh);
+			wake_up_process(b->first_wait->tsk);
 		} else {
 			b->first_wait = NULL;
-			WRITE_ONCE(b->irq_seqno_bh, NULL);
+			rcu_assign_pointer(b->irq_seqno_bh, NULL);
 			__intel_breadcrumbs_disable_irq(b);
 		}
 	} else {
@@ -380,7 +399,7 @@ out_unlock:
 	GEM_BUG_ON(b->first_wait == wait);
 	GEM_BUG_ON(rb_first(&b->waiters) !=
 		   (b->first_wait ? &b->first_wait->node : NULL));
-	GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
+	GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
 	spin_unlock(&b->lock);
 }
 
@@ -536,6 +555,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 	setup_timer(&b->fake_irq,
 		    intel_breadcrumbs_fake_irq,
 		    (unsigned long)engine);
+	setup_timer(&b->hangcheck,
+		    intel_breadcrumbs_hangcheck,
+		    (unsigned long)engine);
 
 	/* Spawn a thread to provide a common bottom-half for all signals.
 	 * As this is an asynchronous interface we cannot steal the current
@@ -560,6 +582,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	if (!IS_ERR_OR_NULL(b->signaler))
 		kthread_stop(b->signaler);
 
+	del_timer_sync(&b->hangcheck);
 	del_timer_sync(&b->fake_irq);
 }
 
@@ -573,11 +596,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915)
 	 * RCU lock, i.e. as we call wake_up_process() we must be holding the
 	 * rcu_read_lock().
 	 */
-	rcu_read_lock();
 	for_each_engine(engine, i915)
 		if (unlikely(intel_engine_wakeup(engine)))
 			mask |= intel_engine_flag(engine);
-	rcu_read_unlock();
 
 	return mask;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c6f27ab99e8f..c5c0c35d4f6e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1202,8 +1202,8 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv,
 	if (HAS_PCH_SPLIT(dev)) {
 		u32 port_sel;
 
-		pp_reg = PCH_PP_CONTROL;
-		port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK;
+		pp_reg = PP_CONTROL(0);
+		port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK;
 
 		if (port_sel == PANEL_PORT_SELECT_LVDS &&
 		    I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT)
@@ -1211,10 +1211,10 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv,
 		/* XXX: else fix for eDP */
 	} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
 		/* presumably write lock depends on pipe, not port select */
-		pp_reg = VLV_PIPE_PP_CONTROL(pipe);
+		pp_reg = PP_CONTROL(pipe);
 		panel_pipe = pipe;
 	} else {
-		pp_reg = PP_CONTROL;
+		pp_reg = PP_CONTROL(0);
 		if (I915_READ(LVDS) & LVDS_PIPEB_SELECT)
 			panel_pipe = PIPE_B;
 	}
@@ -1959,12 +1959,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
 	 * a plane.  On ILK+ the pipe PLLs are integrated, so we don't
 	 * need the check.
 	 */
-	if (HAS_GMCH_DISPLAY(dev_priv))
+	if (HAS_GMCH_DISPLAY(dev_priv)) {
 		if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI))
 			assert_dsi_pll_enabled(dev_priv);
 		else
 			assert_pll_enabled(dev_priv, pipe);
-	else {
+	} else {
 		if (crtc->config->has_pch_encoder) {
 			/* if driving the PCH, we need FDI enabled */
 			assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder);
@@ -2147,33 +2147,6 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
 	}
 }
 
-static void
-intel_fill_fb_info(struct drm_i915_private *dev_priv,
-		   struct drm_framebuffer *fb)
-{
-	struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info;
-	unsigned int tile_size, tile_width, tile_height, cpp;
-
-	tile_size = intel_tile_size(dev_priv);
-
-	cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-	intel_tile_dims(dev_priv, &tile_width, &tile_height,
-			fb->modifier[0], cpp);
-
-	info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp);
-	info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height);
-
-	if (info->pixel_format == DRM_FORMAT_NV12) {
-		cpp = drm_format_plane_cpp(fb->pixel_format, 1);
-		intel_tile_dims(dev_priv, &tile_width, &tile_height,
-				fb->modifier[1], cpp);
-
-		info->uv_offset = fb->offsets[1];
-		info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp);
-		info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height);
-	}
-}
-
 static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
 {
 	if (INTEL_INFO(dev_priv)->gen >= 9)
@@ -2294,21 +2267,67 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
 	i915_gem_object_unpin_from_display_plane(obj, &view);
 }
 
+static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane,
+			  unsigned int rotation)
+{
+	if (intel_rotation_90_or_270(rotation))
+		return to_intel_framebuffer(fb)->rotated[plane].pitch;
+	else
+		return fb->pitches[plane];
+}
+
+/*
+ * Convert the x/y offsets into a linear offset.
+ * Only valid with 0/180 degree rotation, which is fine since linear
+ * offset is only used with linear buffers on pre-hsw and tiled buffers
+ * with gen2/3, and 90/270 degree rotations isn't supported on any of them.
+ */
+u32 intel_fb_xy_to_linear(int x, int y,
+			  const struct intel_plane_state *state,
+			  int plane)
+{
+	const struct drm_framebuffer *fb = state->base.fb;
+	unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+	unsigned int pitch = fb->pitches[plane];
+
+	return y * pitch + x * cpp;
+}
+
+/*
+ * Add the x/y offsets derived from fb->offsets[] to the user
+ * specified plane src x/y offsets. The resulting x/y offsets
+ * specify the start of scanout from the beginning of the gtt mapping.
+ */
+void intel_add_fb_offsets(int *x, int *y,
+			  const struct intel_plane_state *state,
+			  int plane)
+
+{
+	const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb);
+	unsigned int rotation = state->base.rotation;
+
+	if (intel_rotation_90_or_270(rotation)) {
+		*x += intel_fb->rotated[plane].x;
+		*y += intel_fb->rotated[plane].y;
+	} else {
+		*x += intel_fb->normal[plane].x;
+		*y += intel_fb->normal[plane].y;
+	}
+}
+
 /*
- * Adjust the tile offset by moving the difference into
- * the x/y offsets.
- *
  * Input tile dimensions and pitch must already be
  * rotated to match x and y, and in pixel units.
  */
-static u32 intel_adjust_tile_offset(int *x, int *y,
-				    unsigned int tile_width,
-				    unsigned int tile_height,
-				    unsigned int tile_size,
-				    unsigned int pitch_tiles,
-				    u32 old_offset,
-				    u32 new_offset)
-{
+static u32 _intel_adjust_tile_offset(int *x, int *y,
+				     unsigned int tile_width,
+				     unsigned int tile_height,
+				     unsigned int tile_size,
+				     unsigned int pitch_tiles,
+				     u32 old_offset,
+				     u32 new_offset)
+{
+	unsigned int pitch_pixels = pitch_tiles * tile_width;
 	unsigned int tiles;
 
 	WARN_ON(old_offset & (tile_size - 1));
@@ -2320,6 +2339,54 @@ static u32 intel_adjust_tile_offset(int *x, int *y,
 	*y += tiles / pitch_tiles * tile_height;
 	*x += tiles % pitch_tiles * tile_width;
 
+	/* minimize x in case it got needlessly big */
+	*y += *x / pitch_pixels * tile_height;
+	*x %= pitch_pixels;
+
+	return new_offset;
+}
+
+/*
+ * Adjust the tile offset by moving the difference into
+ * the x/y offsets.
+ */
+static u32 intel_adjust_tile_offset(int *x, int *y,
+				    const struct intel_plane_state *state, int plane,
+				    u32 old_offset, u32 new_offset)
+{
+	const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+	const struct drm_framebuffer *fb = state->base.fb;
+	unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+	unsigned int rotation = state->base.rotation;
+	unsigned int pitch = intel_fb_pitch(fb, plane, rotation);
+
+	WARN_ON(new_offset > old_offset);
+
+	if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) {
+		unsigned int tile_size, tile_width, tile_height;
+		unsigned int pitch_tiles;
+
+		tile_size = intel_tile_size(dev_priv);
+		intel_tile_dims(dev_priv, &tile_width, &tile_height,
+				fb->modifier[plane], cpp);
+
+		if (intel_rotation_90_or_270(rotation)) {
+			pitch_tiles = pitch / tile_height;
+			swap(tile_width, tile_height);
+		} else {
+			pitch_tiles = pitch / (tile_width * cpp);
+		}
+
+		_intel_adjust_tile_offset(x, y, tile_width, tile_height,
+					  tile_size, pitch_tiles,
+					  old_offset, new_offset);
+	} else {
+		old_offset += *y * pitch + *x * cpp;
+
+		*y = (old_offset - new_offset) / pitch;
+		*x = ((old_offset - new_offset) - *y * pitch) / cpp;
+	}
+
 	return new_offset;
 }
 
@@ -2330,18 +2397,24 @@ static u32 intel_adjust_tile_offset(int *x, int *y,
  * In the 90/270 rotated case, x and y are assumed
  * to be already rotated to match the rotated GTT view, and
  * pitch is the tile_height aligned framebuffer height.
+ *
+ * This function is used when computing the derived information
+ * under intel_framebuffer, so using any of that information
+ * here is not allowed. Anything under drm_framebuffer can be
+ * used. This is why the user has to pass in the pitch since it
+ * is specified in the rotated orientation.
  */
-u32 intel_compute_tile_offset(int *x, int *y,
-			      const struct drm_framebuffer *fb, int plane,
-			      unsigned int pitch,
-			      unsigned int rotation)
+static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
+				      int *x, int *y,
+				      const struct drm_framebuffer *fb, int plane,
+				      unsigned int pitch,
+				      unsigned int rotation,
+				      u32 alignment)
 {
-	const struct drm_i915_private *dev_priv = to_i915(fb->dev);
 	uint64_t fb_modifier = fb->modifier[plane];
 	unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
-	u32 offset, offset_aligned, alignment;
+	u32 offset, offset_aligned;
 
-	alignment = intel_surf_alignment(dev_priv, fb_modifier);
 	if (alignment)
 		alignment--;
 
@@ -2369,9 +2442,9 @@ u32 intel_compute_tile_offset(int *x, int *y,
 		offset = (tile_rows * pitch_tiles + tiles) * tile_size;
 		offset_aligned = offset & ~alignment;
 
-		intel_adjust_tile_offset(x, y, tile_width, tile_height,
-					 tile_size, pitch_tiles,
-					 offset, offset_aligned);
+		_intel_adjust_tile_offset(x, y, tile_width, tile_height,
+					  tile_size, pitch_tiles,
+					  offset, offset_aligned);
 	} else {
 		offset = *y * pitch + *x * cpp;
 		offset_aligned = offset & ~alignment;
@@ -2383,6 +2456,177 @@ u32 intel_compute_tile_offset(int *x, int *y,
 	return offset_aligned;
 }
 
+u32 intel_compute_tile_offset(int *x, int *y,
+			      const struct intel_plane_state *state,
+			      int plane)
+{
+	const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev);
+	const struct drm_framebuffer *fb = state->base.fb;
+	unsigned int rotation = state->base.rotation;
+	int pitch = intel_fb_pitch(fb, plane, rotation);
+	u32 alignment;
+
+	/* AUX_DIST needs only 4K alignment */
+	if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1)
+		alignment = 4096;
+	else
+		alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]);
+
+	return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch,
+					  rotation, alignment);
+}
+
+/* Convert the fb->offset[] linear offset into x/y offsets */
+static void intel_fb_offset_to_xy(int *x, int *y,
+				  const struct drm_framebuffer *fb, int plane)
+{
+	unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+	unsigned int pitch = fb->pitches[plane];
+	u32 linear_offset = fb->offsets[plane];
+
+	*y = linear_offset / pitch;
+	*x = linear_offset % pitch / cpp;
+}
+
+static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier)
+{
+	switch (fb_modifier) {
+	case I915_FORMAT_MOD_X_TILED:
+		return I915_TILING_X;
+	case I915_FORMAT_MOD_Y_TILED:
+		return I915_TILING_Y;
+	default:
+		return I915_TILING_NONE;
+	}
+}
+
+static int
+intel_fill_fb_info(struct drm_i915_private *dev_priv,
+		   struct drm_framebuffer *fb)
+{
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	struct intel_rotation_info *rot_info = &intel_fb->rot_info;
+	u32 gtt_offset_rotated = 0;
+	unsigned int max_size = 0;
+	uint32_t format = fb->pixel_format;
+	int i, num_planes = drm_format_num_planes(format);
+	unsigned int tile_size = intel_tile_size(dev_priv);
+
+	for (i = 0; i < num_planes; i++) {
+		unsigned int width, height;
+		unsigned int cpp, size;
+		u32 offset;
+		int x, y;
+
+		cpp = drm_format_plane_cpp(format, i);
+		width = drm_format_plane_width(fb->width, format, i);
+		height = drm_format_plane_height(fb->height, format, i);
+
+		intel_fb_offset_to_xy(&x, &y, fb, i);
+
+		/*
+		 * The fence (if used) is aligned to the start of the object
+		 * so having the framebuffer wrap around across the edge of the
+		 * fenced region doesn't really work. We have no API to configure
+		 * the fence start offset within the object (nor could we probably
+		 * on gen2/3). So it's just easier if we just require that the
+		 * fb layout agrees with the fence layout. We already check that the
+		 * fb stride matches the fence stride elsewhere.
+		 */
+		if (i915_gem_object_is_tiled(intel_fb->obj) &&
+		    (x + width) * cpp > fb->pitches[i]) {
+			DRM_DEBUG("bad fb plane %d offset: 0x%x\n",
+				  i, fb->offsets[i]);
+			return -EINVAL;
+		}
+
+		/*
+		 * First pixel of the framebuffer from
+		 * the start of the normal gtt mapping.
+		 */
+		intel_fb->normal[i].x = x;
+		intel_fb->normal[i].y = y;
+
+		offset = _intel_compute_tile_offset(dev_priv, &x, &y,
+						    fb, 0, fb->pitches[i],
+						    DRM_ROTATE_0, tile_size);
+		offset /= tile_size;
+
+		if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) {
+			unsigned int tile_width, tile_height;
+			unsigned int pitch_tiles;
+			struct drm_rect r;
+
+			intel_tile_dims(dev_priv, &tile_width, &tile_height,
+					fb->modifier[i], cpp);
+
+			rot_info->plane[i].offset = offset;
+			rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp);
+			rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
+			rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
+
+			intel_fb->rotated[i].pitch =
+				rot_info->plane[i].height * tile_height;
+
+			/* how many tiles does this plane need */
+			size = rot_info->plane[i].stride * rot_info->plane[i].height;
+			/*
+			 * If the plane isn't horizontally tile aligned,
+			 * we need one more tile.
+			 */
+			if (x != 0)
+				size++;
+
+			/* rotate the x/y offsets to match the GTT view */
+			r.x1 = x;
+			r.y1 = y;
+			r.x2 = x + width;
+			r.y2 = y + height;
+			drm_rect_rotate(&r,
+					rot_info->plane[i].width * tile_width,
+					rot_info->plane[i].height * tile_height,
+					DRM_ROTATE_270);
+			x = r.x1;
+			y = r.y1;
+
+			/* rotate the tile dimensions to match the GTT view */
+			pitch_tiles = intel_fb->rotated[i].pitch / tile_height;
+			swap(tile_width, tile_height);
+
+			/*
+			 * We only keep the x/y offsets, so push all of the
+			 * gtt offset into the x/y offsets.
+			 */
+			_intel_adjust_tile_offset(&x, &y, tile_size,
+						  tile_width, tile_height, pitch_tiles,
+						  gtt_offset_rotated * tile_size, 0);
+
+			gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
+
+			/*
+			 * First pixel of the framebuffer from
+			 * the start of the rotated gtt mapping.
+			 */
+			intel_fb->rotated[i].x = x;
+			intel_fb->rotated[i].y = y;
+		} else {
+			size = DIV_ROUND_UP((y + height) * fb->pitches[i] +
+					    x * cpp, tile_size);
+		}
+
+		/* how many tiles in total needed in the bo */
+		max_size = max(max_size, offset + size);
+	}
+
+	if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) {
+		DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n",
+			  max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int i9xx_format_to_fourcc(int format)
 {
 	switch (format) {
@@ -2604,6 +2848,169 @@ valid_fb:
 		  &obj->frontbuffer_bits);
 }
 
+static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
+			       unsigned int rotation)
+{
+	int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+
+	switch (fb->modifier[plane]) {
+	case DRM_FORMAT_MOD_NONE:
+	case I915_FORMAT_MOD_X_TILED:
+		switch (cpp) {
+		case 8:
+			return 4096;
+		case 4:
+		case 2:
+		case 1:
+			return 8192;
+		default:
+			MISSING_CASE(cpp);
+			break;
+		}
+		break;
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_Yf_TILED:
+		switch (cpp) {
+		case 8:
+			return 2048;
+		case 4:
+			return 4096;
+		case 2:
+		case 1:
+			return 8192;
+		default:
+			MISSING_CASE(cpp);
+			break;
+		}
+		break;
+	default:
+		MISSING_CASE(fb->modifier[plane]);
+	}
+
+	return 2048;
+}
+
+static int skl_check_main_surface(struct intel_plane_state *plane_state)
+{
+	const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int rotation = plane_state->base.rotation;
+	int x = plane_state->base.src.x1 >> 16;
+	int y = plane_state->base.src.y1 >> 16;
+	int w = drm_rect_width(&plane_state->base.src) >> 16;
+	int h = drm_rect_height(&plane_state->base.src) >> 16;
+	int max_width = skl_max_plane_width(fb, 0, rotation);
+	int max_height = 4096;
+	u32 alignment, offset, aux_offset = plane_state->aux.offset;
+
+	if (w > max_width || h > max_height) {
+		DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
+			      w, h, max_width, max_height);
+		return -EINVAL;
+	}
+
+	intel_add_fb_offsets(&x, &y, plane_state, 0);
+	offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
+
+	alignment = intel_surf_alignment(dev_priv, fb->modifier[0]);
+
+	/*
+	 * AUX surface offset is specified as the distance from the
+	 * main surface offset, and it must be non-negative. Make
+	 * sure that is what we will get.
+	 */
+	if (offset > aux_offset)
+		offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
+						  offset, aux_offset & ~(alignment - 1));
+
+	/*
+	 * When using an X-tiled surface, the plane blows up
+	 * if the x offset + width exceed the stride.
+	 *
+	 * TODO: linear and Y-tiled seem fine, Yf untested,
+	 */
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) {
+		int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+
+		while ((x + w) * cpp > fb->pitches[0]) {
+			if (offset == 0) {
+				DRM_DEBUG_KMS("Unable to find suitable display surface offset\n");
+				return -EINVAL;
+			}
+
+			offset = intel_adjust_tile_offset(&x, &y, plane_state, 0,
+							  offset, offset - alignment);
+		}
+	}
+
+	plane_state->main.offset = offset;
+	plane_state->main.x = x;
+	plane_state->main.y = y;
+
+	return 0;
+}
+
+static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int rotation = plane_state->base.rotation;
+	int max_width = skl_max_plane_width(fb, 1, rotation);
+	int max_height = 4096;
+	int x = plane_state->base.src.x1 >> 17;
+	int y = plane_state->base.src.y1 >> 17;
+	int w = drm_rect_width(&plane_state->base.src) >> 17;
+	int h = drm_rect_height(&plane_state->base.src) >> 17;
+	u32 offset;
+
+	intel_add_fb_offsets(&x, &y, plane_state, 1);
+	offset = intel_compute_tile_offset(&x, &y, plane_state, 1);
+
+	/* FIXME not quite sure how/if these apply to the chroma plane */
+	if (w > max_width || h > max_height) {
+		DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n",
+			      w, h, max_width, max_height);
+		return -EINVAL;
+	}
+
+	plane_state->aux.offset = offset;
+	plane_state->aux.x = x;
+	plane_state->aux.y = y;
+
+	return 0;
+}
+
+int skl_check_plane_surface(struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int rotation = plane_state->base.rotation;
+	int ret;
+
+	/* Rotate src coordinates to match rotated GTT view */
+	if (intel_rotation_90_or_270(rotation))
+		drm_rect_rotate(&plane_state->base.src,
+				fb->width, fb->height, DRM_ROTATE_270);
+
+	/*
+	 * Handle the AUX surface first since
+	 * the main surface setup depends on it.
+	 */
+	if (fb->pixel_format == DRM_FORMAT_NV12) {
+		ret = skl_check_nv12_aux_surface(plane_state);
+		if (ret)
+			return ret;
+	} else {
+		plane_state->aux.offset = ~0xfff;
+		plane_state->aux.x = 0;
+		plane_state->aux.y = 0;
+	}
+
+	ret = skl_check_main_surface(plane_state);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static void i9xx_update_primary_plane(struct drm_plane *primary,
 				      const struct intel_crtc_state *crtc_state,
 				      const struct intel_plane_state *plane_state)
@@ -2618,7 +3025,6 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	u32 dspcntr;
 	i915_reg_t reg = DSPCNTR(plane);
 	unsigned int rotation = plane_state->base.rotation;
-	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	int x = plane_state->base.src.x1 >> 16;
 	int y = plane_state->base.src.y1 >> 16;
 
@@ -2671,36 +3077,31 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 		BUG();
 	}
 
-	if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj))
+	if (INTEL_GEN(dev_priv) >= 4 &&
+	    fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		dspcntr |= DISPPLANE_TILED;
 
 	if (IS_G4X(dev))
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
-	linear_offset = y * fb->pitches[0] + x * cpp;
+	intel_add_fb_offsets(&x, &y, plane_state, 0);
 
-	if (INTEL_INFO(dev)->gen >= 4) {
+	if (INTEL_INFO(dev)->gen >= 4)
 		intel_crtc->dspaddr_offset =
-			intel_compute_tile_offset(&x, &y, fb, 0,
-						  fb->pitches[0], rotation);
-		linear_offset -= intel_crtc->dspaddr_offset;
-	} else {
-		intel_crtc->dspaddr_offset = linear_offset;
-	}
+			intel_compute_tile_offset(&x, &y, plane_state, 0);
 
 	if (rotation == DRM_ROTATE_180) {
 		dspcntr |= DISPPLANE_ROTATE_180;
 
 		x += (crtc_state->pipe_src_w - 1);
 		y += (crtc_state->pipe_src_h - 1);
-
-		/* Finding the last pixel of the last line of the display
-		data and adding to linear_offset*/
-		linear_offset +=
-			(crtc_state->pipe_src_h - 1) * fb->pitches[0] +
-			(crtc_state->pipe_src_w - 1) * cpp;
 	}
 
+	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
+	if (INTEL_INFO(dev)->gen < 4)
+		intel_crtc->dspaddr_offset = linear_offset;
+
 	intel_crtc->adjusted_x = x;
 	intel_crtc->adjusted_y = y;
 
@@ -2709,7 +3110,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	if (INTEL_INFO(dev)->gen >= 4) {
 		I915_WRITE(DSPSURF(plane),
-			   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+			   intel_fb_gtt_offset(fb, rotation) +
+			   intel_crtc->dspaddr_offset);
 		I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
 		I915_WRITE(DSPLINOFF(plane), linear_offset);
 	} else
@@ -2741,13 +3143,11 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int plane = intel_crtc->plane;
 	u32 linear_offset;
 	u32 dspcntr;
 	i915_reg_t reg = DSPCNTR(plane);
 	unsigned int rotation = plane_state->base.rotation;
-	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	int x = plane_state->base.src.x1 >> 16;
 	int y = plane_state->base.src.y1 >> 16;
 
@@ -2780,32 +3180,28 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
 		BUG();
 	}
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		dspcntr |= DISPPLANE_TILED;
 
 	if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
-	linear_offset = y * fb->pitches[0] + x * cpp;
+	intel_add_fb_offsets(&x, &y, plane_state, 0);
+
 	intel_crtc->dspaddr_offset =
-		intel_compute_tile_offset(&x, &y, fb, 0,
-					  fb->pitches[0], rotation);
-	linear_offset -= intel_crtc->dspaddr_offset;
+		intel_compute_tile_offset(&x, &y, plane_state, 0);
+
 	if (rotation == DRM_ROTATE_180) {
 		dspcntr |= DISPPLANE_ROTATE_180;
 
 		if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
 			x += (crtc_state->pipe_src_w - 1);
 			y += (crtc_state->pipe_src_h - 1);
-
-			/* Finding the last pixel of the last line of the display
-			data and adding to linear_offset*/
-			linear_offset +=
-				(crtc_state->pipe_src_h - 1) * fb->pitches[0] +
-				(crtc_state->pipe_src_w - 1) * cpp;
 		}
 	}
 
+	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
 	intel_crtc->adjusted_x = x;
 	intel_crtc->adjusted_y = y;
 
@@ -2813,7 +3209,8 @@ static void ironlake_update_primary_plane(struct drm_plane *primary,
 
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	I915_WRITE(DSPSURF(plane),
-		   i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
+		   intel_fb_gtt_offset(fb, rotation) +
+		   intel_crtc->dspaddr_offset);
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
 	} else {
@@ -2835,28 +3232,16 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
 	}
 }
 
-u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
-			   struct drm_i915_gem_object *obj,
-			   unsigned int plane)
+u32 intel_fb_gtt_offset(struct drm_framebuffer *fb,
+			unsigned int rotation)
 {
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct i915_ggtt_view view;
-	struct i915_vma *vma;
 	u64 offset;
 
-	intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb,
-				intel_plane->base.state->rotation);
-
-	vma = i915_gem_obj_to_ggtt_view(obj, &view);
-	if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
-		view.type))
-		return -1;
-
-	offset = vma->node.start;
+	intel_fill_fb_ggtt_view(&view, fb, rotation);
 
-	if (plane == 1) {
-		offset += vma->ggtt_view.params.rotated.uv_start_page *
-			  PAGE_SIZE;
-	}
+	offset = i915_gem_obj_ggtt_offset_view(obj, &view);
 
 	WARN_ON(upper_32_bits(offset));
 
@@ -2890,6 +3275,28 @@ static void skl_detach_scalers(struct intel_crtc *intel_crtc)
 	}
 }
 
+u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
+		     unsigned int rotation)
+{
+	const struct drm_i915_private *dev_priv = to_i915(fb->dev);
+	u32 stride = intel_fb_pitch(fb, plane, rotation);
+
+	/*
+	 * The stride is either expressed as a multiple of 64 bytes chunks for
+	 * linear buffers or in number of tiles for tiled buffers.
+	 */
+	if (intel_rotation_90_or_270(rotation)) {
+		int cpp = drm_format_plane_cpp(fb->pixel_format, plane);
+
+		stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp);
+	} else {
+		stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0],
+						    fb->pixel_format);
+	}
+
+	return stride;
+}
+
 u32 skl_plane_ctl_format(uint32_t pixel_format)
 {
 	switch (pixel_format) {
@@ -2979,16 +3386,14 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int pipe = intel_crtc->pipe;
-	u32 plane_ctl, stride_div, stride;
-	u32 tile_height, plane_offset, plane_size;
+	u32 plane_ctl;
 	unsigned int rotation = plane_state->base.rotation;
-	int x_offset, y_offset;
-	u32 surf_addr;
+	u32 stride = skl_plane_stride(fb, 0, rotation);
+	u32 surf_addr = plane_state->main.offset;
 	int scaler_id = plane_state->scaler_id;
-	int src_x = plane_state->base.src.x1 >> 16;
-	int src_y = plane_state->base.src.y1 >> 16;
+	int src_x = plane_state->main.x;
+	int src_y = plane_state->main.y;
 	int src_w = drm_rect_width(&plane_state->base.src) >> 16;
 	int src_h = drm_rect_height(&plane_state->base.src) >> 16;
 	int dst_x = plane_state->base.dst.x1;
@@ -3005,36 +3410,19 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
 	plane_ctl |= skl_plane_ctl_rotation(rotation);
 
-	stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
-					       fb->pixel_format);
-	surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0);
+	/* Sizes are 0 based */
+	src_w--;
+	src_h--;
+	dst_w--;
+	dst_h--;
 
-	WARN_ON(drm_rect_width(&plane_state->base.src) == 0);
-
-	if (intel_rotation_90_or_270(rotation)) {
-		int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-
-		/* stride = Surface height in tiles */
-		tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
-		stride = DIV_ROUND_UP(fb->height, tile_height);
-		x_offset = stride * tile_height - src_y - src_h;
-		y_offset = src_x;
-		plane_size = (src_w - 1) << 16 | (src_h - 1);
-	} else {
-		stride = fb->pitches[0] / stride_div;
-		x_offset = src_x;
-		y_offset = src_y;
-		plane_size = (src_h - 1) << 16 | (src_w - 1);
-	}
-	plane_offset = y_offset << 16 | x_offset;
-
-	intel_crtc->adjusted_x = x_offset;
-	intel_crtc->adjusted_y = y_offset;
+	intel_crtc->adjusted_x = src_x;
+	intel_crtc->adjusted_y = src_y;
 
 	I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
-	I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset);
-	I915_WRITE(PLANE_SIZE(pipe, 0), plane_size);
+	I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x);
 	I915_WRITE(PLANE_STRIDE(pipe, 0), stride);
+	I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w);
 
 	if (scaler_id >= 0) {
 		uint32_t ps_ctrl = 0;
@@ -3051,7 +3439,8 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 		I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x);
 	}
 
-	I915_WRITE(PLANE_SURF(pipe, 0), surf_addr);
+	I915_WRITE(PLANE_SURF(pipe, 0),
+		   intel_fb_gtt_offset(fb, rotation) + surf_addr);
 
 	POSTING_READ(PLANE_SURF(pipe, 0));
 }
@@ -3093,40 +3482,113 @@ static void intel_update_primary_planes(struct drm_device *dev)
 
 	for_each_crtc(dev, crtc) {
 		struct intel_plane *plane = to_intel_plane(crtc->primary);
-		struct intel_plane_state *plane_state;
-
-		drm_modeset_lock_crtc(crtc, &plane->base);
-		plane_state = to_intel_plane_state(plane->base.state);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
 
 		if (plane_state->base.visible)
 			plane->update_plane(&plane->base,
 					    to_intel_crtc_state(crtc->state),
 					    plane_state);
+	}
+}
+
+static int
+__intel_display_resume(struct drm_device *dev,
+		       struct drm_atomic_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	int i, ret;
 
-		drm_modeset_unlock_crtc(crtc);
+	intel_modeset_setup_hw_state(dev);
+	i915_redisable_vga(dev);
+
+	if (!state)
+		return 0;
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		/*
+		 * Force recalculation even if we restore
+		 * current state. With fast modeset this may not result
+		 * in a modeset when the state is compatible.
+		 */
+		crtc_state->mode_changed = true;
 	}
+
+	/* ignore any reset values/BIOS leftovers in the WM registers */
+	to_intel_atomic_state(state)->skip_intermediate_wm = true;
+
+	ret = drm_atomic_commit(state);
+
+	WARN_ON(ret == -EDEADLK);
+	return ret;
+}
+
+static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
+{
+	return intel_has_gpu_reset(dev_priv) &&
+		INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
 }
 
 void intel_prepare_reset(struct drm_i915_private *dev_priv)
 {
-	/* no reset support for gen2 */
-	if (IS_GEN2(dev_priv))
-		return;
+	struct drm_device *dev = &dev_priv->drm;
+	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+	struct drm_atomic_state *state;
+	int ret;
 
-	/* reset doesn't touch the display */
-	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
+	/*
+	 * Need mode_config.mutex so that we don't
+	 * trample ongoing ->detect() and whatnot.
+	 */
+	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_acquire_init(ctx, 0);
+	while (1) {
+		ret = drm_modeset_lock_all_ctx(dev, ctx);
+		if (ret != -EDEADLK)
+			break;
+
+		drm_modeset_backoff(ctx);
+	}
+
+	/* reset doesn't touch the display, but flips might get nuked anyway, */
+	if (!i915.force_reset_modeset_test &&
+	    !gpu_reset_clobbers_display(dev_priv))
 		return;
 
-	drm_modeset_lock_all(&dev_priv->drm);
 	/*
 	 * Disabling the crtcs gracefully seems nicer. Also the
 	 * g33 docs say we should at least disable all the planes.
 	 */
-	intel_display_suspend(&dev_priv->drm);
+	state = drm_atomic_helper_duplicate_state(dev, ctx);
+	if (IS_ERR(state)) {
+		ret = PTR_ERR(state);
+		state = NULL;
+		DRM_ERROR("Duplicating state failed with %i\n", ret);
+		goto err;
+	}
+
+	ret = drm_atomic_helper_disable_all(dev, ctx);
+	if (ret) {
+		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
+		goto err;
+	}
+
+	dev_priv->modeset_restore_state = state;
+	state->acquire_ctx = ctx;
+	return;
+
+err:
+	drm_atomic_state_free(state);
 }
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
+	struct drm_device *dev = &dev_priv->drm;
+	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+	int ret;
+
 	/*
 	 * Flips in the rings will be nuked by the reset,
 	 * so complete all pending flips so that user space
@@ -3134,44 +3596,51 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 	 */
 	intel_complete_page_flips(dev_priv);
 
-	/* no reset support for gen2 */
-	if (IS_GEN2(dev_priv))
-		return;
+	dev_priv->modeset_restore_state = NULL;
 
 	/* reset doesn't touch the display */
-	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
+	if (!gpu_reset_clobbers_display(dev_priv)) {
+		if (!state) {
+			/*
+			 * Flips in the rings have been nuked by the reset,
+			 * so update the base address of all primary
+			 * planes to the the last fb to make sure we're
+			 * showing the correct fb after a reset.
+			 *
+			 * FIXME: Atomic will make this obsolete since we won't schedule
+			 * CS-based flips (which might get lost in gpu resets) any more.
+			 */
+			intel_update_primary_planes(dev);
+		} else {
+			ret = __intel_display_resume(dev, state);
+			if (ret)
+				DRM_ERROR("Restoring old state failed with %i\n", ret);
+		}
+	} else {
 		/*
-		 * Flips in the rings have been nuked by the reset,
-		 * so update the base address of all primary
-		 * planes to the the last fb to make sure we're
-		 * showing the correct fb after a reset.
-		 *
-		 * FIXME: Atomic will make this obsolete since we won't schedule
-		 * CS-based flips (which might get lost in gpu resets) any more.
+		 * The display has been reset as well,
+		 * so need a full re-initialization.
 		 */
-		intel_update_primary_planes(&dev_priv->drm);
-		return;
-	}
+		intel_runtime_pm_disable_interrupts(dev_priv);
+		intel_runtime_pm_enable_interrupts(dev_priv);
 
-	/*
-	 * The display has been reset as well,
-	 * so need a full re-initialization.
-	 */
-	intel_runtime_pm_disable_interrupts(dev_priv);
-	intel_runtime_pm_enable_interrupts(dev_priv);
-
-	intel_modeset_init_hw(&dev_priv->drm);
+		intel_modeset_init_hw(dev);
 
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (dev_priv->display.hpd_irq_setup)
-		dev_priv->display.hpd_irq_setup(dev_priv);
-	spin_unlock_irq(&dev_priv->irq_lock);
+		spin_lock_irq(&dev_priv->irq_lock);
+		if (dev_priv->display.hpd_irq_setup)
+			dev_priv->display.hpd_irq_setup(dev_priv);
+		spin_unlock_irq(&dev_priv->irq_lock);
 
-	intel_display_resume(&dev_priv->drm);
+		ret = __intel_display_resume(dev, state);
+		if (ret)
+			DRM_ERROR("Restoring old state failed with %i\n", ret);
 
-	intel_hpd_init(dev_priv);
+		intel_hpd_init(dev_priv);
+	}
 
-	drm_modeset_unlock_all(&dev_priv->drm);
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+	mutex_unlock(&dev->mode_config.mutex);
 }
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -9411,7 +9880,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
 	I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
 	I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
 	I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
-	I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
+	I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n");
 	I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
 	     "CPU PWM1 enabled\n");
 	if (IS_HASWELL(dev))
@@ -11198,7 +11667,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
 	intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset |
-			i915_gem_object_get_tiling(obj));
+			intel_fb_modifier_to_tiling(fb->modifier[0]));
 
 	/* XXX Enabling the panel-fitter across page-flip is so far
 	 * untested on non-native modes, so ignore it for now.
@@ -11230,7 +11699,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev,
 
 	intel_ring_emit(ring, MI_DISPLAY_FLIP |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
-	intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj));
+	intel_ring_emit(ring, fb->pitches[0] |
+			intel_fb_modifier_to_tiling(fb->modifier[0]));
 	intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
 
 	/* Contrary to the suggestions in the documentation,
@@ -11333,7 +11803,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	}
 
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
-	intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj));
+	intel_ring_emit(ring, fb->pitches[0] |
+			intel_fb_modifier_to_tiling(fb->modifier[0]));
 	intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset);
 	intel_ring_emit(ring, (MI_NOOP));
 
@@ -11382,7 +11853,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
 	const enum pipe pipe = intel_crtc->pipe;
-	u32 ctl, stride, tile_height;
+	u32 ctl, stride = skl_plane_stride(fb, 0, rotation);
 
 	ctl = I915_READ(PLANE_CTL(pipe, 0));
 	ctl &= ~PLANE_CTL_TILED_MASK;
@@ -11403,20 +11874,6 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
 	}
 
 	/*
-	 * The stride is either expressed as a multiple of 64 bytes chunks for
-	 * linear buffers or in number of tiles for tiled buffers.
-	 */
-	if (intel_rotation_90_or_270(rotation)) {
-		/* stride = Surface height in tiles */
-		tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
-		stride = DIV_ROUND_UP(fb->height, tile_height);
-	} else {
-		stride = fb->pitches[0] /
-			intel_fb_stride_alignment(dev_priv, fb->modifier[0],
-						  fb->pixel_format);
-	}
-
-	/*
 	 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
 	 * PLANE_SURF updates, the update is then guaranteed to be atomic.
 	 */
@@ -11432,15 +11889,13 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc,
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_framebuffer *intel_fb =
-		to_intel_framebuffer(intel_crtc->base.primary->fb);
-	struct drm_i915_gem_object *obj = intel_fb->obj;
+	struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
 	i915_reg_t reg = DSPCNTR(intel_crtc->plane);
 	u32 dspcntr;
 
 	dspcntr = I915_READ(reg);
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		dspcntr |= DISPPLANE_TILED;
 	else
 		dspcntr &= ~DISPPLANE_TILED;
@@ -11668,8 +12123,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
 		engine = &dev_priv->engine[BCS];
-		if (i915_gem_object_get_tiling(obj) !=
-		    i915_gem_object_get_tiling(intel_fb_obj(work->old_fb)))
+		if (fb->modifier[0] != old_fb->modifier[0])
 			/* vlv: DISPLAY_FLIP fails to change tiling */
 			engine = NULL;
 	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
@@ -11689,8 +12143,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	if (ret)
 		goto cleanup_pending;
 
-	work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary),
-						  obj, 0);
+	work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation);
 	work->gtt_offset += intel_crtc->dspaddr_offset;
 	work->rotation = crtc->primary->state->rotation;
 
@@ -14106,12 +14559,14 @@ intel_check_primary_plane(struct drm_plane *plane,
 			  struct intel_crtc_state *crtc_state,
 			  struct intel_plane_state *state)
 {
+	struct drm_i915_private *dev_priv = to_i915(plane->dev);
 	struct drm_crtc *crtc = state->base.crtc;
 	int min_scale = DRM_PLANE_HELPER_NO_SCALING;
 	int max_scale = DRM_PLANE_HELPER_NO_SCALING;
 	bool can_position = false;
+	int ret;
 
-	if (INTEL_INFO(plane->dev)->gen >= 9) {
+	if (INTEL_GEN(dev_priv) >= 9) {
 		/* use scaler when colorkey is not required */
 		if (state->ckey.flags == I915_SET_COLORKEY_NONE) {
 			min_scale = 1;
@@ -14120,10 +14575,23 @@ intel_check_primary_plane(struct drm_plane *plane,
 		can_position = true;
 	}
 
-	return drm_plane_helper_check_state(&state->base,
-					    &state->clip,
-					    min_scale, max_scale,
-					    can_position, true);
+	ret = drm_plane_helper_check_state(&state->base,
+					   &state->clip,
+					   min_scale, max_scale,
+					   can_position, true);
+	if (ret)
+		return ret;
+
+	if (!state->base.fb)
+		return 0;
+
+	if (INTEL_GEN(dev_priv) >= 9) {
+		ret = skl_check_plane_surface(state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static void intel_begin_crtc_commit(struct drm_crtc *crtc,
@@ -14635,12 +15103,50 @@ static bool intel_crt_present(struct drm_device *dev)
 	return true;
 }
 
+void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv)
+{
+	int pps_num;
+	int pps_idx;
+
+	if (HAS_DDI(dev_priv))
+		return;
+	/*
+	 * This w/a is needed at least on CPT/PPT, but to be sure apply it
+	 * everywhere where registers can be write protected.
+	 */
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		pps_num = 2;
+	else
+		pps_num = 1;
+
+	for (pps_idx = 0; pps_idx < pps_num; pps_idx++) {
+		u32 val = I915_READ(PP_CONTROL(pps_idx));
+
+		val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS;
+		I915_WRITE(PP_CONTROL(pps_idx), val);
+	}
+}
+
+static void intel_pps_init(struct drm_i915_private *dev_priv)
+{
+	if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv))
+		dev_priv->pps_mmio_base = PCH_PPS_BASE;
+	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		dev_priv->pps_mmio_base = VLV_PPS_BASE;
+	else
+		dev_priv->pps_mmio_base = PPS_BASE;
+
+	intel_pps_unlock_regs_wa(dev_priv);
+}
+
 static void intel_setup_outputs(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_encoder *encoder;
 	bool dpd_is_edp = false;
 
+	intel_pps_init(dev_priv);
+
 	/*
 	 * intel_edp_init_connector() depends on this completing first, to
 	 * prevent the registeration of both eDP and LVDS and the incorrect
@@ -14908,24 +15414,26 @@ static int intel_framebuffer_init(struct drm_device *dev,
 				  struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	unsigned int aligned_height;
+	unsigned int tiling = i915_gem_object_get_tiling(obj);
 	int ret;
 	u32 pitch_limit, stride_alignment;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
 	if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) {
-		/* Enforce that fb modifier and tiling mode match, but only for
-		 * X-tiled. This is needed for FBC. */
-		if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) !=
-		    !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) {
+		/*
+		 * If there's a fence, enforce that
+		 * the fb modifier and tiling mode match.
+		 */
+		if (tiling != I915_TILING_NONE &&
+		    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
 			DRM_DEBUG("tiling_mode doesn't match fb modifier\n");
 			return -EINVAL;
 		}
 	} else {
-		if (i915_gem_object_get_tiling(obj) == I915_TILING_X)
+		if (tiling == I915_TILING_X) {
 			mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED;
-		else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) {
+		} else if (tiling == I915_TILING_Y) {
 			DRM_DEBUG("No Y tiling for legacy addfb\n");
 			return -EINVAL;
 		}
@@ -14949,6 +15457,16 @@ static int intel_framebuffer_init(struct drm_device *dev,
 		return -EINVAL;
 	}
 
+	/*
+	 * gen2/3 display engine uses the fence if present,
+	 * so the tiling mode must match the fb modifier exactly.
+	 */
+	if (INTEL_INFO(dev_priv)->gen < 4 &&
+	    tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) {
+		DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n");
+		return -EINVAL;
+	}
+
 	stride_alignment = intel_fb_stride_alignment(dev_priv,
 						     mode_cmd->modifier[0],
 						     mode_cmd->pixel_format);
@@ -14968,7 +15486,11 @@ static int intel_framebuffer_init(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED &&
+	/*
+	 * If there's a fence, enforce that
+	 * the fb pitch and fence stride match.
+	 */
+	if (tiling != I915_TILING_NONE &&
 	    mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) {
 		DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n",
 			  mode_cmd->pitches[0],
@@ -15034,17 +15556,12 @@ static int intel_framebuffer_init(struct drm_device *dev,
 	if (mode_cmd->offsets[0] != 0)
 		return -EINVAL;
 
-	aligned_height = intel_fb_align_height(dev, mode_cmd->height,
-					       mode_cmd->pixel_format,
-					       mode_cmd->modifier[0]);
-	/* FIXME drm helper for size checks (especially planar formats)? */
-	if (obj->base.size < aligned_height * mode_cmd->pitches[0])
-		return -EINVAL;
-
 	drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
 	intel_fb->obj = obj;
 
-	intel_fill_fb_info(dev_priv, &intel_fb->base);
+	ret = intel_fill_fb_info(dev_priv, &intel_fb->base);
+	if (ret)
+		return ret;
 
 	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
 	if (ret) {
@@ -15757,6 +16274,13 @@ static bool intel_encoder_has_connectors(struct intel_encoder *encoder)
 	return false;
 }
 
+static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
+			      enum transcoder pch_transcoder)
+{
+	return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
+		(HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
+}
+
 static void intel_sanitize_crtc(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -15835,7 +16359,17 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		 * worst a fifo underrun happens which also sets this to false.
 		 */
 		crtc->cpu_fifo_underrun_disabled = true;
-		crtc->pch_fifo_underrun_disabled = true;
+		/*
+		 * We track the PCH trancoder underrun reporting state
+		 * within the crtc. With crtc for pipe A housing the underrun
+		 * reporting state for PCH transcoder A, crtc for pipe B housing
+		 * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A,
+		 * and marking underrun reporting as disabled for the non-existing
+		 * PCH transcoders B and C would prevent enabling the south
+		 * error interrupt (see cpt_can_enable_serr_int()).
+		 */
+		if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe))
+			crtc->pch_fifo_underrun_disabled = true;
 	}
 }
 
@@ -16149,9 +16683,10 @@ void intel_display_resume(struct drm_device *dev)
 	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
 	struct drm_modeset_acquire_ctx ctx;
 	int ret;
-	bool setup = false;
 
 	dev_priv->modeset_restore_state = NULL;
+	if (state)
+		state->acquire_ctx = &ctx;
 
 	/*
 	 * This is a cludge because with real atomic modeset mode_config.mutex
@@ -16162,43 +16697,17 @@ void intel_display_resume(struct drm_device *dev)
 	mutex_lock(&dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, 0);
 
-retry:
-	ret = drm_modeset_lock_all_ctx(dev, &ctx);
-
-	if (ret == 0 && !setup) {
-		setup = true;
-
-		intel_modeset_setup_hw_state(dev);
-		i915_redisable_vga(dev);
-	}
-
-	if (ret == 0 && state) {
-		struct drm_crtc_state *crtc_state;
-		struct drm_crtc *crtc;
-		int i;
-
-		state->acquire_ctx = &ctx;
-
-		/* ignore any reset values/BIOS leftovers in the WM registers */
-		to_intel_atomic_state(state)->skip_intermediate_wm = true;
-
-		for_each_crtc_in_state(state, crtc, crtc_state, i) {
-			/*
-			 * Force recalculation even if we restore
-			 * current state. With fast modeset this may not result
-			 * in a modeset when the state is compatible.
-			 */
-			crtc_state->mode_changed = true;
-		}
-
-		ret = drm_atomic_commit(state);
-	}
+	while (1) {
+		ret = drm_modeset_lock_all_ctx(dev, &ctx);
+		if (ret != -EDEADLK)
+			break;
 
-	if (ret == -EDEADLK) {
 		drm_modeset_backoff(&ctx);
-		goto retry;
 	}
 
+	if (!ret)
+		ret = __intel_display_resume(dev, state);
+
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 	mutex_unlock(&dev->mode_config.mutex);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 8fe2afa5439e..364db908c191 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -256,6 +256,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev,
 static void
 intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev,
 					      struct intel_dp *intel_dp);
+static void
+intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp);
 
 static void pps_lock(struct intel_dp *intel_dp)
 {
@@ -463,13 +465,13 @@ typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv,
 static bool vlv_pipe_has_pp_on(struct drm_i915_private *dev_priv,
 			       enum pipe pipe)
 {
-	return I915_READ(VLV_PIPE_PP_STATUS(pipe)) & PP_ON;
+	return I915_READ(PP_STATUS(pipe)) & PP_ON;
 }
 
 static bool vlv_pipe_has_vdd_on(struct drm_i915_private *dev_priv,
 				enum pipe pipe)
 {
-	return I915_READ(VLV_PIPE_PP_CONTROL(pipe)) & EDP_FORCE_VDD;
+	return I915_READ(PP_CONTROL(pipe)) & EDP_FORCE_VDD;
 }
 
 static bool vlv_pipe_any(struct drm_i915_private *dev_priv,
@@ -486,7 +488,7 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv,
 	enum pipe pipe;
 
 	for (pipe = PIPE_A; pipe <= PIPE_B; pipe++) {
-		u32 port_sel = I915_READ(VLV_PIPE_PP_ON_DELAYS(pipe)) &
+		u32 port_sel = I915_READ(PP_ON_DELAYS(pipe)) &
 			PANEL_PORT_SELECT_MASK;
 
 		if (port_sel != PANEL_PORT_SELECT_VLV(port))
@@ -583,30 +585,21 @@ static void intel_pps_get_registers(struct drm_i915_private *dev_priv,
 				    struct intel_dp *intel_dp,
 				    struct pps_registers *regs)
 {
+	int pps_idx = 0;
+
 	memset(regs, 0, sizeof(*regs));
 
-	if (IS_BROXTON(dev_priv)) {
-		int idx = bxt_power_sequencer_idx(intel_dp);
-
-		regs->pp_ctrl = BXT_PP_CONTROL(idx);
-		regs->pp_stat = BXT_PP_STATUS(idx);
-		regs->pp_on = BXT_PP_ON_DELAYS(idx);
-		regs->pp_off = BXT_PP_OFF_DELAYS(idx);
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		regs->pp_ctrl = PCH_PP_CONTROL;
-		regs->pp_stat = PCH_PP_STATUS;
-		regs->pp_on = PCH_PP_ON_DELAYS;
-		regs->pp_off = PCH_PP_OFF_DELAYS;
-		regs->pp_div = PCH_PP_DIVISOR;
-	} else {
-		enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
+	if (IS_BROXTON(dev_priv))
+		pps_idx = bxt_power_sequencer_idx(intel_dp);
+	else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		pps_idx = vlv_power_sequencer_pipe(intel_dp);
 
-		regs->pp_ctrl = VLV_PIPE_PP_CONTROL(pipe);
-		regs->pp_stat = VLV_PIPE_PP_STATUS(pipe);
-		regs->pp_on = VLV_PIPE_PP_ON_DELAYS(pipe);
-		regs->pp_off = VLV_PIPE_PP_OFF_DELAYS(pipe);
-		regs->pp_div = VLV_PIPE_PP_DIVISOR(pipe);
-	}
+	regs->pp_ctrl = PP_CONTROL(pps_idx);
+	regs->pp_stat = PP_STATUS(pps_idx);
+	regs->pp_on = PP_ON_DELAYS(pps_idx);
+	regs->pp_off = PP_OFF_DELAYS(pps_idx);
+	if (!IS_BROXTON(dev_priv))
+		regs->pp_div = PP_DIVISOR(pps_idx);
 }
 
 static i915_reg_t
@@ -651,8 +644,8 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code,
 		i915_reg_t pp_ctrl_reg, pp_div_reg;
 		u32 pp_div;
 
-		pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe);
-		pp_div_reg  = VLV_PIPE_PP_DIVISOR(pipe);
+		pp_ctrl_reg = PP_CONTROL(pipe);
+		pp_div_reg  = PP_DIVISOR(pipe);
 		pp_div = I915_READ(pp_div_reg);
 		pp_div &= PP_REFERENCE_DIVIDER_MASK;
 
@@ -1836,7 +1829,8 @@ static  u32 ironlake_get_pp_control(struct intel_dp *intel_dp)
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
 	control = I915_READ(_pp_ctrl_reg(intel_dp));
-	if (!IS_BROXTON(dev)) {
+	if (WARN_ON(!HAS_DDI(dev_priv) &&
+		    (control & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS)) {
 		control &= ~PANEL_UNLOCK_MASK;
 		control |= PANEL_UNLOCK_REGS;
 	}
@@ -1957,7 +1951,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	DRM_DEBUG_KMS("PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n",
 	I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg));
 
-	if ((pp & POWER_TARGET_ON) == 0)
+	if ((pp & PANEL_POWER_ON) == 0)
 		intel_dp->panel_power_off_time = ktime_get_boottime();
 
 	power_domain = intel_display_port_aux_power_domain(intel_encoder);
@@ -2044,7 +2038,7 @@ static void edp_panel_on(struct intel_dp *intel_dp)
 		POSTING_READ(pp_ctrl_reg);
 	}
 
-	pp |= POWER_TARGET_ON;
+	pp |= PANEL_POWER_ON;
 	if (!IS_GEN5(dev))
 		pp |= PANEL_POWER_RESET;
 
@@ -2096,7 +2090,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 	pp = ironlake_get_pp_control(intel_dp);
 	/* We need to switch off panel power _and_ force vdd, for otherwise some
 	 * panels get very unhappy and cease to work. */
-	pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
+	pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD |
 		EDP_BLC_ENABLE);
 
 	pp_ctrl_reg = _pp_ctrl_reg(intel_dp);
@@ -2729,7 +2723,7 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
 	enum pipe pipe = intel_dp->pps_pipe;
-	i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe);
+	i915_reg_t pp_on_reg = PP_ON_DELAYS(pipe);
 
 	edp_panel_vdd_off_sync(intel_dp);
 
@@ -4666,13 +4660,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 
 	pps_lock(intel_dp);
 
-	/*
-	 * Read out the current power sequencer assignment,
-	 * in case the BIOS did something with it.
-	 */
-	if (IS_VALLEYVIEW(encoder->dev) || IS_CHERRYVIEW(encoder->dev))
-		vlv_initial_power_sequencer_setup(intel_dp);
-
+	/* Reinit the power sequencer, in case BIOS did something with it. */
+	intel_dp_pps_init(encoder->dev, intel_dp);
 	intel_edp_panel_vdd_sanitize(intel_dp);
 
 	pps_unlock(intel_dp);
@@ -5020,6 +5009,17 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev,
 		      I915_READ(regs.pp_div));
 }
 
+static void intel_dp_pps_init(struct drm_device *dev,
+			      struct intel_dp *intel_dp)
+{
+	if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
+		vlv_initial_power_sequencer_setup(intel_dp);
+	} else {
+		intel_dp_init_panel_power_sequencer(dev, intel_dp);
+		intel_dp_init_panel_power_sequencer_registers(dev, intel_dp);
+	}
+}
+
 /**
  * intel_dp_set_drrs_state - program registers for RR switch to take effect
  * @dev: DRM device
@@ -5434,14 +5434,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	pps_lock(intel_dp);
 
 	intel_dp_init_panel_power_timestamps(intel_dp);
-
-	if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
-		vlv_initial_power_sequencer_setup(intel_dp);
-	} else {
-		intel_dp_init_panel_power_sequencer(dev, intel_dp);
-		intel_dp_init_panel_power_sequencer_registers(dev, intel_dp);
-	}
-
+	intel_dp_pps_init(dev, intel_dp);
 	intel_edp_panel_vdd_sanitize(intel_dp);
 
 	pps_unlock(intel_dp);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c29a429cbc45..9539f0e63fa5 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -178,6 +178,16 @@ struct intel_framebuffer {
 	struct drm_framebuffer base;
 	struct drm_i915_gem_object *obj;
 	struct intel_rotation_info rot_info;
+
+	/* for each plane in the normal GTT view */
+	struct {
+		unsigned int x, y;
+	} normal[2];
+	/* for each plane in the rotated GTT view */
+	struct {
+		unsigned int x, y;
+		unsigned int pitch; /* pixels */
+	} rotated[2];
 };
 
 struct intel_fbdev {
@@ -340,6 +350,15 @@ struct intel_plane_state {
 	struct drm_plane_state base;
 	struct drm_rect clip;
 
+	struct {
+		u32 offset;
+		int x, y;
+	} main;
+	struct {
+		u32 offset;
+		int x, y;
+	} aux;
+
 	/*
 	 * scaler_id
 	 *    = -1 : not using a scaler
@@ -1153,12 +1172,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 		      const char *name, u32 reg, int ref_freq);
 extern const struct drm_plane_funcs intel_plane_funcs;
 void intel_init_display_hooks(struct drm_i915_private *dev_priv);
+unsigned int intel_fb_xy_to_linear(int x, int y,
+				   const struct intel_plane_state *state,
+				   int plane);
+void intel_add_fb_offsets(int *x, int *y,
+			  const struct intel_plane_state *state, int plane);
 unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info);
 bool intel_has_pending_fb_unpin(struct drm_device *dev);
 void intel_mark_busy(struct drm_i915_private *dev_priv);
 void intel_mark_idle(struct drm_i915_private *dev_priv);
 void intel_crtc_restore_mode(struct drm_crtc *crtc);
 int intel_display_suspend(struct drm_device *dev);
+void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv);
 void intel_encoder_destroy(struct drm_encoder *encoder);
 int intel_connector_init(struct intel_connector *);
 struct intel_connector *intel_connector_alloc(void);
@@ -1277,9 +1302,7 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state);
 #define assert_pipe_enabled(d, p) assert_pipe(d, p, true)
 #define assert_pipe_disabled(d, p) assert_pipe(d, p, false)
 u32 intel_compute_tile_offset(int *x, int *y,
-			      const struct drm_framebuffer *fb, int plane,
-			      unsigned int pitch,
-			      unsigned int rotation);
+			      const struct intel_plane_state *state, int plane);
 void intel_prepare_reset(struct drm_i915_private *dev_priv);
 void intel_finish_reset(struct drm_i915_private *dev_priv);
 void hsw_enable_pc8(struct drm_i915_private *dev_priv);
@@ -1322,13 +1345,14 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
 
-u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
-			   struct drm_i915_gem_object *obj,
-			   unsigned int plane);
+u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation);
 
 u32 skl_plane_ctl_format(uint32_t pixel_format);
 u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
 u32 skl_plane_ctl_rotation(unsigned int rotation);
+u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
+		     unsigned int rotation);
+int skl_check_plane_surface(struct intel_plane_state *plane_state);
 
 /* intel_csr.c */
 void intel_csr_ucode_init(struct drm_i915_private *);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e9b301ae2d0c..186c12d07f99 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -109,6 +109,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 int intel_engines_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
 	unsigned int mask = 0;
 	int (*init)(struct intel_engine_cs *engine);
 	unsigned int i;
@@ -142,11 +143,10 @@ int intel_engines_init(struct drm_device *dev)
 	 * are added to the driver by a warning and disabling the forgotten
 	 * engines.
 	 */
-	if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) {
-		struct intel_device_info *info =
-			(struct intel_device_info *)&dev_priv->info;
-		info->ring_mask = mask;
-	}
+	if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask))
+		device_info->ring_mask = mask;
+
+	device_info->num_rings = hweight32(mask);
 
 	return 0;
 
@@ -164,6 +164,7 @@ cleanup:
 void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 {
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
 
 static void intel_engine_init_requests(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 623cf26cd784..26f3d9c08120 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -67,22 +67,21 @@ struct i915_guc_client {
 	void *client_base;		/* first page (only) of above	*/
 	struct i915_gem_context *owner;
 	struct intel_guc *guc;
+
+	uint32_t engines;		/* bitmap of (host) engine ids	*/
 	uint32_t priority;
 	uint32_t ctx_index;
-
 	uint32_t proc_desc_offset;
+
 	uint32_t doorbell_offset;
 	uint32_t cookie;
 	uint16_t doorbell_id;
-	uint16_t padding;		/* Maintain alignment		*/
+	uint16_t padding[3];		/* Maintain alignment		*/
 
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
-	uint32_t unused;		/* Was 'wq_head'		*/
-
 	uint32_t no_wq_space;
-	uint32_t q_fail;		/* No longer used		*/
 	uint32_t b_fail;
 	int retcode;
 
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 3763e30cc165..bfcf6b5d29ad 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -59,13 +59,25 @@
  *
  */
 
-#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin"
+#define SKL_FW_MAJOR 6
+#define SKL_FW_MINOR 1
+
+#define BXT_FW_MAJOR 8
+#define BXT_FW_MINOR 7
+
+#define KBL_FW_MAJOR 9
+#define KBL_FW_MINOR 14
+
+#define GUC_FW_PATH(platform, major, minor) \
+       "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
+
+#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR)
 MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
 
-#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin"
+#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR)
 MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
 
-#define I915_KBL_GUC_UCODE "i915/kbl_guc_ver9_14.bin"
+#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
 MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
 
 /* User-friendly representation of an enum */
@@ -697,16 +709,16 @@ void intel_guc_init(struct drm_device *dev)
 		fw_path = NULL;
 	} else if (IS_SKYLAKE(dev)) {
 		fw_path = I915_SKL_GUC_UCODE;
-		guc_fw->guc_fw_major_wanted = 6;
-		guc_fw->guc_fw_minor_wanted = 1;
+		guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR;
+		guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR;
 	} else if (IS_BROXTON(dev)) {
 		fw_path = I915_BXT_GUC_UCODE;
-		guc_fw->guc_fw_major_wanted = 8;
-		guc_fw->guc_fw_minor_wanted = 7;
+		guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR;
+		guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR;
 	} else if (IS_KABYLAKE(dev)) {
 		fw_path = I915_KBL_GUC_UCODE;
-		guc_fw->guc_fw_major_wanted = 9;
-		guc_fw->guc_fw_minor_wanted = 14;
+		guc_fw->guc_fw_major_wanted = KBL_FW_MAJOR;
+		guc_fw->guc_fw_minor_wanted = KBL_FW_MINOR;
 	} else {
 		fw_path = "";	/* unknown device */
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 309c5d9b1c57..c24ac39d51f6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -780,7 +780,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ret)
 		goto err;
 
-	vaddr = i915_gem_object_pin_map(ce->state);
+	vaddr = i915_gem_object_pin_map(ce->state, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
 		goto unpin_ctx_obj;
@@ -1182,7 +1182,7 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size)
 	}
 
 	ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL,
-				       0, PAGE_SIZE, 0);
+				       0, PAGE_SIZE, PIN_HIGH);
 	if (ret) {
 		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
 				 ret);
@@ -1755,7 +1755,7 @@ lrc_setup_hws(struct intel_engine_cs *engine,
 	/* The HWSP is part of the default context object in LRC mode. */
 	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
 				       LRC_PPHWSP_PN * PAGE_SIZE;
-	hws = i915_gem_object_pin_map(dctx_obj);
+	hws = i915_gem_object_pin_map(dctx_obj, I915_MAP_WB);
 	if (IS_ERR(hws))
 		return PTR_ERR(hws);
 	engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
@@ -1968,7 +1968,7 @@ populate_lr_context(struct i915_gem_context *ctx,
 		return ret;
 	}
 
-	vaddr = i915_gem_object_pin_map(ctx_obj);
+	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
@@ -2189,7 +2189,7 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv,
 		if (!ctx_obj)
 			continue;
 
-		vaddr = i915_gem_object_pin_map(ctx_obj);
+		vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
 		if (WARN_ON(IS_ERR(vaddr)))
 			continue;
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 49550470483e..668eabb0ba1b 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -48,6 +48,20 @@ struct intel_lvds_connector {
 	struct notifier_block lid_notifier;
 };
 
+struct intel_lvds_pps {
+	/* 100us units */
+	int t1_t2;
+	int t3;
+	int t4;
+	int t5;
+	int tx;
+
+	int divider;
+
+	int port;
+	bool powerdown_on_reset;
+};
+
 struct intel_lvds_encoder {
 	struct intel_encoder base;
 
@@ -55,6 +69,9 @@ struct intel_lvds_encoder {
 	i915_reg_t reg;
 	u32 a3_power;
 
+	struct intel_lvds_pps init_pps;
+	u32 init_lvds_val;
+
 	struct intel_lvds_connector *attached_connector;
 };
 
@@ -136,6 +153,83 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
 	pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
+static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv,
+					struct intel_lvds_pps *pps)
+{
+	u32 val;
+
+	pps->powerdown_on_reset = I915_READ(PP_CONTROL(0)) & PANEL_POWER_RESET;
+
+	val = I915_READ(PP_ON_DELAYS(0));
+	pps->port = (val & PANEL_PORT_SELECT_MASK) >>
+		    PANEL_PORT_SELECT_SHIFT;
+	pps->t1_t2 = (val & PANEL_POWER_UP_DELAY_MASK) >>
+		     PANEL_POWER_UP_DELAY_SHIFT;
+	pps->t5 = (val & PANEL_LIGHT_ON_DELAY_MASK) >>
+		  PANEL_LIGHT_ON_DELAY_SHIFT;
+
+	val = I915_READ(PP_OFF_DELAYS(0));
+	pps->t3 = (val & PANEL_POWER_DOWN_DELAY_MASK) >>
+		  PANEL_POWER_DOWN_DELAY_SHIFT;
+	pps->tx = (val & PANEL_LIGHT_OFF_DELAY_MASK) >>
+		  PANEL_LIGHT_OFF_DELAY_SHIFT;
+
+	val = I915_READ(PP_DIVISOR(0));
+	pps->divider = (val & PP_REFERENCE_DIVIDER_MASK) >>
+		       PP_REFERENCE_DIVIDER_SHIFT;
+	val = (val & PANEL_POWER_CYCLE_DELAY_MASK) >>
+	      PANEL_POWER_CYCLE_DELAY_SHIFT;
+	/*
+	 * Remove the BSpec specified +1 (100ms) offset that accounts for a
+	 * too short power-cycle delay due to the asynchronous programming of
+	 * the register.
+	 */
+	if (val)
+		val--;
+	/* Convert from 100ms to 100us units */
+	pps->t4 = val * 1000;
+
+	if (INTEL_INFO(dev_priv)->gen <= 4 &&
+	    pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) {
+		DRM_DEBUG_KMS("Panel power timings uninitialized, "
+			      "setting defaults\n");
+		/* Set T2 to 40ms and T5 to 200ms in 100 usec units */
+		pps->t1_t2 = 40 * 10;
+		pps->t5 = 200 * 10;
+		/* Set T3 to 35ms and Tx to 200ms in 100 usec units */
+		pps->t3 = 35 * 10;
+		pps->tx = 200 * 10;
+	}
+
+	DRM_DEBUG_DRIVER("LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d "
+			 "divider %d port %d powerdown_on_reset %d\n",
+			 pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx,
+			 pps->divider, pps->port, pps->powerdown_on_reset);
+}
+
+static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv,
+				   struct intel_lvds_pps *pps)
+{
+	u32 val;
+
+	val = I915_READ(PP_CONTROL(0));
+	WARN_ON((val & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS);
+	if (pps->powerdown_on_reset)
+		val |= PANEL_POWER_RESET;
+	I915_WRITE(PP_CONTROL(0), val);
+
+	I915_WRITE(PP_ON_DELAYS(0), (pps->port << PANEL_PORT_SELECT_SHIFT) |
+				    (pps->t1_t2 << PANEL_POWER_UP_DELAY_SHIFT) |
+				    (pps->t5 << PANEL_LIGHT_ON_DELAY_SHIFT));
+	I915_WRITE(PP_OFF_DELAYS(0), (pps->t3 << PANEL_POWER_DOWN_DELAY_SHIFT) |
+				     (pps->tx << PANEL_LIGHT_OFF_DELAY_SHIFT));
+
+	val = pps->divider << PP_REFERENCE_DIVIDER_SHIFT;
+	val |= (DIV_ROUND_UP(pps->t4, 1000) + 1) <<
+	       PANEL_POWER_CYCLE_DELAY_SHIFT;
+	I915_WRITE(PP_DIVISOR(0), val);
+}
+
 static void intel_pre_enable_lvds(struct intel_encoder *encoder)
 {
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
@@ -154,7 +248,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder)
 		assert_pll_disabled(dev_priv, pipe);
 	}
 
-	temp = I915_READ(lvds_encoder->reg);
+	intel_lvds_pps_init_hw(dev_priv, &lvds_encoder->init_pps);
+
+	temp = lvds_encoder->init_lvds_val;
 	temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 
 	if (HAS_PCH_CPT(dev)) {
@@ -217,21 +313,12 @@ static void intel_enable_lvds(struct intel_encoder *encoder)
 	struct intel_connector *intel_connector =
 		&lvds_encoder->attached_connector->base;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	i915_reg_t ctl_reg, stat_reg;
-
-	if (HAS_PCH_SPLIT(dev)) {
-		ctl_reg = PCH_PP_CONTROL;
-		stat_reg = PCH_PP_STATUS;
-	} else {
-		ctl_reg = PP_CONTROL;
-		stat_reg = PP_STATUS;
-	}
 
 	I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN);
 
-	I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON);
+	I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON);
 	POSTING_READ(lvds_encoder->reg);
-	if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, PP_ON, 1000))
+	if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000))
 		DRM_ERROR("timed out waiting for panel to power on\n");
 
 	intel_panel_enable_backlight(intel_connector);
@@ -242,18 +329,9 @@ static void intel_disable_lvds(struct intel_encoder *encoder)
 	struct drm_device *dev = encoder->base.dev;
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	i915_reg_t ctl_reg, stat_reg;
-
-	if (HAS_PCH_SPLIT(dev)) {
-		ctl_reg = PCH_PP_CONTROL;
-		stat_reg = PCH_PP_STATUS;
-	} else {
-		ctl_reg = PP_CONTROL;
-		stat_reg = PP_STATUS;
-	}
 
-	I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON);
-	if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, 0, 1000))
+	I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) & ~PANEL_POWER_ON);
+	if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, 0, 1000))
 		DRM_ERROR("timed out waiting for panel to power off\n");
 
 	I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN);
@@ -900,17 +978,6 @@ void intel_lvds_init(struct drm_device *dev)
 	int pipe;
 	u8 pin;
 
-	/*
-	 * Unlock registers and just leave them unlocked. Do this before
-	 * checking quirk lists to avoid bogus WARNINGs.
-	 */
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PCH_PP_CONTROL,
-			   I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
-	} else if (INTEL_INFO(dev_priv)->gen < 5) {
-		I915_WRITE(PP_CONTROL,
-			   I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
-	}
 	if (!intel_lvds_supported(dev))
 		return;
 
@@ -943,18 +1010,6 @@ void intel_lvds_init(struct drm_device *dev)
 		DRM_DEBUG_KMS("LVDS is not present in VBT, but enabled anyway\n");
 	}
 
-	 /* Set the Panel Power On/Off timings if uninitialized. */
-	if (INTEL_INFO(dev_priv)->gen < 5 &&
-	    I915_READ(PP_ON_DELAYS) == 0 && I915_READ(PP_OFF_DELAYS) == 0) {
-		/* Set T2 to 40ms and T5 to 200ms */
-		I915_WRITE(PP_ON_DELAYS, 0x019007d0);
-
-		/* Set T3 to 35ms and Tx to 200ms */
-		I915_WRITE(PP_OFF_DELAYS, 0x015e07d0);
-
-		DRM_DEBUG_KMS("Panel power timings uninitialized, setting defaults\n");
-	}
-
 	lvds_encoder = kzalloc(sizeof(*lvds_encoder), GFP_KERNEL);
 	if (!lvds_encoder)
 		return;
@@ -1020,6 +1075,10 @@ void intel_lvds_init(struct drm_device *dev)
 				      dev->mode_config.scaling_mode_property,
 				      DRM_MODE_SCALE_ASPECT);
 	intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT;
+
+	intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps);
+	lvds_encoder->init_lvds_val = lvds;
+
 	/*
 	 * LVDS discovery:
 	 * 1) check for EDID on DDC
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 81ab11934d85..99014d7821c2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5675,8 +5675,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
 	u32 pcbr;
 	int pctx_size = 24*1024;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	pcbr = I915_READ(VLV_PCBR);
 	if (pcbr) {
 		/* BIOS set it up already, grab the pre-alloc'd space */
@@ -5712,7 +5710,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
 out:
 	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
 	dev_priv->vlv_pctx = pctx;
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
 static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
@@ -6488,6 +6485,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 		intel_runtime_pm_get(dev_priv);
 	}
 
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	mutex_lock(&dev_priv->rps.hw_lock);
 
 	/* Initialize RPS limits (for userspace) */
@@ -6529,6 +6527,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	dev_priv->rps.boost_freq = dev_priv->rps.max_freq;
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	intel_autoenable_gt_powersave(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e08a1e1b04e4..75a9f635c3dc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1317,7 +1317,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req)
 	enum intel_engine_id id;
 	int ret, num_rings;
 
-	num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
+	num_rings = INTEL_INFO(dev_priv)->num_rings;
 	ret = intel_ring_begin(req, (num_rings-1) * 8);
 	if (ret)
 		return ret;
@@ -1354,7 +1354,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req)
 	enum intel_engine_id id;
 	int ret, num_rings;
 
-	num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
+	num_rings = INTEL_INFO(dev_priv)->num_rings;
 	ret = intel_ring_begin(req, (num_rings-1) * 6);
 	if (ret)
 		return ret;
@@ -1389,7 +1389,7 @@ static int gen6_signal(struct drm_i915_gem_request *req)
 	enum intel_engine_id id;
 	int ret, num_rings;
 
-	num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
+	num_rings = INTEL_INFO(dev_priv)->num_rings;
 	ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
 	if (ret)
 		return ret;
@@ -1951,7 +1951,7 @@ int intel_ring_pin(struct intel_ring *ring)
 		if (ret)
 			goto err_unpin;
 
-		addr = i915_gem_object_pin_map(obj);
+		addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 		if (IS_ERR(addr)) {
 			ret = PTR_ERR(addr);
 			goto err_unpin;
@@ -2093,7 +2093,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 
 	if (ce->state) {
 		ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
-					       ctx->ggtt_alignment, 0);
+					       ctx->ggtt_alignment, PIN_HIGH);
 		if (ret)
 			goto error;
 	}
@@ -2410,9 +2410,7 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
 	/* After manually advancing the seqno, fake the interrupt in case
 	 * there are any waiters for that seqno.
 	 */
-	rcu_read_lock();
 	intel_engine_wakeup(engine);
-	rcu_read_unlock();
 }
 
 static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
@@ -2631,7 +2629,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
 			i915.semaphores = 0;
 		} else {
 			i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-			ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+			ret = i915_gem_object_ggtt_pin(obj, NULL,
+						       0, 0, PIN_HIGH);
 			if (ret != 0) {
 				i915_gem_object_put(obj);
 				DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 43e545e44352..ea2735144b2a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action {
 
 struct intel_engine_hangcheck {
 	u64 acthd;
-	unsigned long user_interrupts;
 	u32 seqno;
 	int score;
 	enum intel_engine_hangcheck_action action;
@@ -172,8 +171,7 @@ struct intel_engine_cs {
 	 * the overhead of waking that client is much preferred.
 	 */
 	struct intel_breadcrumbs {
-		struct task_struct *irq_seqno_bh; /* bh for user interrupts */
-		unsigned long irq_wakeups;
+		struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
 		bool irq_posted;
 
 		spinlock_t lock; /* protects the lists of requests */
@@ -183,6 +181,9 @@ struct intel_engine_cs {
 		struct task_struct *signaler; /* used for fence signalling */
 		struct drm_i915_gem_request *first_signal;
 		struct timer_list fake_irq; /* used after a missed interrupt */
+		struct timer_list hangcheck; /* detect missed interrupts */
+
+		unsigned long timeout;
 
 		bool irq_enabled : 1;
 		bool rpm_wakelock : 1;
@@ -310,7 +311,7 @@ struct intel_engine_cs {
 
 	/* An RCU guarded pointer to the last request. No reference is
 	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_gem_active_get_request_rcu(), or hold the
+	 * the request using i915_gem_active_get_rcu(), or hold the
 	 * struct_mutex.
 	 */
 	struct i915_gem_active last_request;
@@ -538,29 +539,35 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
 void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
 
-static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
+static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
 {
-	return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+	return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
 }
 
-static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
+static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 {
 	bool wakeup = false;
-	struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+
 	/* Note that for this not to dangerously chase a dangling pointer,
-	 * the caller is responsible for ensure that the task remain valid for
-	 * wake_up_process() i.e. that the RCU grace period cannot expire.
+	 * we must hold the rcu_read_lock here.
 	 *
 	 * Also note that tsk is likely to be in !TASK_RUNNING state so an
 	 * early test for tsk->state != TASK_RUNNING before wake_up_process()
 	 * is unlikely to be beneficial.
 	 */
-	if (tsk)
-		wakeup = wake_up_process(tsk);
+	if (intel_engine_has_waiter(engine)) {
+		struct task_struct *tsk;
+
+		rcu_read_lock();
+		tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
+		if (tsk)
+			wakeup = wake_up_process(tsk);
+		rcu_read_unlock();
+	}
+
 	return wakeup;
 }
 
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned int intel_kick_waiters(struct drm_i915_private *i915);
 unsigned int intel_kick_signalers(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 1c603bbe5784..d659d6f5b8d3 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -592,6 +592,8 @@ void bxt_disable_dc9(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_KMS("Disabling DC9\n");
 
 	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	intel_pps_unlock_regs_wa(dev_priv);
 }
 
 static void assert_csr_loaded(struct drm_i915_private *dev_priv)
@@ -1121,6 +1123,8 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
 	}
 
 	i915_redisable_vga_power_on(&dev_priv->drm);
+
+	intel_pps_unlock_regs_wa(dev_priv);
 }
 
 static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index cbdca7e4d307..366900dcde34 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -203,21 +203,19 @@ skl_update_plane(struct drm_plane *drm_plane,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_plane *intel_plane = to_intel_plane(drm_plane);
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	const int pipe = intel_plane->pipe;
 	const int plane = intel_plane->plane + 1;
-	u32 plane_ctl, stride_div, stride;
+	u32 plane_ctl;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	u32 surf_addr;
-	u32 tile_height, plane_offset, plane_size;
+	u32 surf_addr = plane_state->main.offset;
 	unsigned int rotation = plane_state->base.rotation;
-	int x_offset, y_offset;
+	u32 stride = skl_plane_stride(fb, 0, rotation);
 	int crtc_x = plane_state->base.dst.x1;
 	int crtc_y = plane_state->base.dst.y1;
 	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
 	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
 	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
 	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
 
@@ -230,15 +228,6 @@ skl_update_plane(struct drm_plane *drm_plane,
 
 	plane_ctl |= skl_plane_ctl_rotation(rotation);
 
-	stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
-					       fb->pixel_format);
-
-	/* Sizes are 0 based */
-	src_w--;
-	src_h--;
-	crtc_w--;
-	crtc_h--;
-
 	if (key->flags) {
 		I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value);
 		I915_WRITE(PLANE_KEYMAX(pipe, plane), key->max_value);
@@ -250,28 +239,15 @@ skl_update_plane(struct drm_plane *drm_plane,
 	else if (key->flags & I915_SET_COLORKEY_SOURCE)
 		plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
 
-	surf_addr = intel_plane_obj_offset(intel_plane, obj, 0);
-
-	if (intel_rotation_90_or_270(rotation)) {
-		int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-
-		/* stride: Surface height in tiles */
-		tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
-		stride = DIV_ROUND_UP(fb->height, tile_height);
-		plane_size = (src_w << 16) | src_h;
-		x_offset = stride * tile_height - y - (src_h + 1);
-		y_offset = x;
-	} else {
-		stride = fb->pitches[0] / stride_div;
-		plane_size = (src_h << 16) | src_w;
-		x_offset = x;
-		y_offset = y;
-	}
-	plane_offset = y_offset << 16 | x_offset;
+	/* Sizes are 0 based */
+	src_w--;
+	src_h--;
+	crtc_w--;
+	crtc_h--;
 
-	I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset);
+	I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x);
 	I915_WRITE(PLANE_STRIDE(pipe, plane), stride);
-	I915_WRITE(PLANE_SIZE(pipe, plane), plane_size);
+	I915_WRITE(PLANE_SIZE(pipe, plane), (src_h << 16) | src_w);
 
 	/* program plane scaler */
 	if (plane_state->scaler_id >= 0) {
@@ -296,7 +272,8 @@ skl_update_plane(struct drm_plane *drm_plane,
 	}
 
 	I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl);
-	I915_WRITE(PLANE_SURF(pipe, plane), surf_addr);
+	I915_WRITE(PLANE_SURF(pipe, plane),
+		   intel_fb_gtt_offset(fb, rotation) + surf_addr);
 	POSTING_READ(PLANE_SURF(pipe, plane));
 }
 
@@ -363,13 +340,11 @@ vlv_update_plane(struct drm_plane *dplane,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_plane *intel_plane = to_intel_plane(dplane);
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int pipe = intel_plane->pipe;
 	int plane = intel_plane->plane;
 	u32 sprctl;
 	u32 sprsurf_offset, linear_offset;
 	unsigned int rotation = dplane->state->rotation;
-	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	int crtc_x = plane_state->base.dst.x1;
 	int crtc_y = plane_state->base.dst.y1;
@@ -431,7 +406,7 @@ vlv_update_plane(struct drm_plane *dplane,
 	 */
 	sprctl |= SP_GAMMA_ENABLE;
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SP_TILED;
 
 	/* Sizes are 0 based */
@@ -440,19 +415,18 @@ vlv_update_plane(struct drm_plane *dplane,
 	crtc_w--;
 	crtc_h--;
 
-	linear_offset = y * fb->pitches[0] + x * cpp;
-	sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0,
-						   fb->pitches[0], rotation);
-	linear_offset -= sprsurf_offset;
+	intel_add_fb_offsets(&x, &y, plane_state, 0);
+	sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
 
 	if (rotation == DRM_ROTATE_180) {
 		sprctl |= SP_ROTATE_180;
 
 		x += src_w;
 		y += src_h;
-		linear_offset += src_h * fb->pitches[0] + src_w * cpp;
 	}
 
+	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
 	if (key->flags) {
 		I915_WRITE(SPKEYMINVAL(pipe, plane), key->min_value);
 		I915_WRITE(SPKEYMAXVAL(pipe, plane), key->max_value);
@@ -468,7 +442,7 @@ vlv_update_plane(struct drm_plane *dplane,
 	I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
 	I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x);
 	else
 		I915_WRITE(SPLINOFF(pipe, plane), linear_offset);
@@ -477,8 +451,8 @@ vlv_update_plane(struct drm_plane *dplane,
 
 	I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
 	I915_WRITE(SPCNTR(pipe, plane), sprctl);
-	I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
-		   sprsurf_offset);
+	I915_WRITE(SPSURF(pipe, plane),
+		   intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
 	POSTING_READ(SPSURF(pipe, plane));
 }
 
@@ -506,12 +480,10 @@ ivb_update_plane(struct drm_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	enum pipe pipe = intel_plane->pipe;
 	u32 sprctl, sprscale = 0;
 	u32 sprsurf_offset, linear_offset;
 	unsigned int rotation = plane_state->base.rotation;
-	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	int crtc_x = plane_state->base.dst.x1;
 	int crtc_y = plane_state->base.dst.y1;
@@ -553,7 +525,7 @@ ivb_update_plane(struct drm_plane *plane,
 	 */
 	sprctl |= SPRITE_GAMMA_ENABLE;
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SPRITE_TILED;
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
@@ -573,10 +545,8 @@ ivb_update_plane(struct drm_plane *plane,
 	if (crtc_w != src_w || crtc_h != src_h)
 		sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h;
 
-	linear_offset = y * fb->pitches[0] + x * cpp;
-	sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0,
-						   fb->pitches[0], rotation);
-	linear_offset -= sprsurf_offset;
+	intel_add_fb_offsets(&x, &y, plane_state, 0);
+	sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
 
 	if (rotation == DRM_ROTATE_180) {
 		sprctl |= SPRITE_ROTATE_180;
@@ -585,10 +555,11 @@ ivb_update_plane(struct drm_plane *plane,
 		if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
 			x += src_w;
 			y += src_h;
-			linear_offset += src_h * fb->pitches[0] + src_w * cpp;
 		}
 	}
 
+	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
 	if (key->flags) {
 		I915_WRITE(SPRKEYVAL(pipe), key->min_value);
 		I915_WRITE(SPRKEYMAX(pipe), key->max_value);
@@ -607,7 +578,7 @@ ivb_update_plane(struct drm_plane *plane,
 	 * register */
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		I915_WRITE(SPROFFSET(pipe), (y << 16) | x);
-	else if (i915_gem_object_is_tiled(obj))
+	else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x);
 	else
 		I915_WRITE(SPRLINOFF(pipe), linear_offset);
@@ -617,7 +588,7 @@ ivb_update_plane(struct drm_plane *plane,
 		I915_WRITE(SPRSCALE(pipe), sprscale);
 	I915_WRITE(SPRCTL(pipe), sprctl);
 	I915_WRITE(SPRSURF(pipe),
-		   i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
+		   intel_fb_gtt_offset(fb, rotation) + sprsurf_offset);
 	POSTING_READ(SPRSURF(pipe));
 }
 
@@ -647,12 +618,10 @@ ilk_update_plane(struct drm_plane *plane,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_framebuffer *fb = plane_state->base.fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int pipe = intel_plane->pipe;
 	u32 dvscntr, dvsscale;
 	u32 dvssurf_offset, linear_offset;
 	unsigned int rotation = plane_state->base.rotation;
-	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	int crtc_x = plane_state->base.dst.x1;
 	int crtc_y = plane_state->base.dst.y1;
@@ -694,7 +663,7 @@ ilk_update_plane(struct drm_plane *plane,
 	 */
 	dvscntr |= DVS_GAMMA_ENABLE;
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		dvscntr |= DVS_TILED;
 
 	if (IS_GEN6(dev))
@@ -710,19 +679,18 @@ ilk_update_plane(struct drm_plane *plane,
 	if (crtc_w != src_w || crtc_h != src_h)
 		dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;
 
-	linear_offset = y * fb->pitches[0] + x * cpp;
-	dvssurf_offset = intel_compute_tile_offset(&x, &y, fb, 0,
-						   fb->pitches[0], rotation);
-	linear_offset -= dvssurf_offset;
+	intel_add_fb_offsets(&x, &y, plane_state, 0);
+	dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
 
 	if (rotation == DRM_ROTATE_180) {
 		dvscntr |= DVS_ROTATE_180;
 
 		x += src_w;
 		y += src_h;
-		linear_offset += src_h * fb->pitches[0] + src_w * cpp;
 	}
 
+	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
+
 	if (key->flags) {
 		I915_WRITE(DVSKEYVAL(pipe), key->min_value);
 		I915_WRITE(DVSKEYMAX(pipe), key->max_value);
@@ -737,7 +705,7 @@ ilk_update_plane(struct drm_plane *plane,
 	I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]);
 	I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x);
 
-	if (i915_gem_object_is_tiled(obj))
+	if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
 		I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x);
 	else
 		I915_WRITE(DVSLINOFF(pipe), linear_offset);
@@ -746,7 +714,7 @@ ilk_update_plane(struct drm_plane *plane,
 	I915_WRITE(DVSSCALE(pipe), dvsscale);
 	I915_WRITE(DVSCNTR(pipe), dvscntr);
 	I915_WRITE(DVSSURF(pipe),
-		   i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
+		   intel_fb_gtt_offset(fb, rotation) + dvssurf_offset);
 	POSTING_READ(DVSSURF(pipe));
 }
 
@@ -785,6 +753,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	int hscale, vscale;
 	int max_scale, min_scale;
 	bool can_scale;
+	int ret;
 
 	src->x1 = state->base.src_x;
 	src->y1 = state->base.src_y;
@@ -949,6 +918,12 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	dst->y1 = crtc_y;
 	dst->y2 = crtc_y + crtc_h;
 
+	if (INTEL_GEN(dev) >= 9) {
+		ret = skl_check_plane_surface(state);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }