1 files changed, 2111 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
new file mode 100644
index 000000000000..6056f558359f
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -0,0 +1,2111 @@
+/*
+ * drivers/video/tegra/host/gk20a/channel_gk20a.c
+ *
+ * GK20A Graphics channel
+ *
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/nvhost.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <linux/highmem.h> /* need for nvmap.h*/
+#include <trace/events/gk20a.h>
+#include <linux/scatterlist.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/dma-buf.h>
+
+#include "debug_gk20a.h"
+
+#include "gk20a.h"
+#include "dbg_gpu_gk20a.h"
+
+#include "hw_ram_gk20a.h"
+#include "hw_fifo_gk20a.h"
+#include "hw_pbdma_gk20a.h"
+#include "hw_ccsr_gk20a.h"
+#include "hw_ltc_gk20a.h"
+
+#define NVMAP_HANDLE_PARAM_SIZE 1
+
+static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f);
+static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
+
+static void free_priv_cmdbuf(struct channel_gk20a *c,
+			     struct priv_cmd_entry *e);
+static void recycle_priv_cmdbuf(struct channel_gk20a *c);
+
+static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
+static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
+
+static int channel_gk20a_commit_userd(struct channel_gk20a *c);
+static int channel_gk20a_setup_userd(struct channel_gk20a *c);
+static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+			u64 gpfifo_base, u32 gpfifo_entries);
+
+static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
+static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a);
+
+static int channel_gk20a_alloc_inst(struct gk20a *g,
+				struct channel_gk20a *ch);
+static void channel_gk20a_free_inst(struct gk20a *g,
+				struct channel_gk20a *ch);
+
+static int channel_gk20a_update_runlist(struct channel_gk20a *c,
+					bool add);
+static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
+
+static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f)
+{
+	struct channel_gk20a *ch = NULL;
+	int chid;
+
+	mutex_lock(&f->ch_inuse_mutex);
+	for (chid = 0; chid < f->num_channels; chid++) {
+		if (!f->channel[chid].in_use) {
+			f->channel[chid].in_use = true;
+			ch = &f->channel[chid];
+			break;
+		}
+	}
+	mutex_unlock(&f->ch_inuse_mutex);
+
+	return ch;
+}
+
+static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c)
+{
+	mutex_lock(&f->ch_inuse_mutex);
+	f->channel[c->hw_chid].in_use = false;
+	mutex_unlock(&f->ch_inuse_mutex);
+}
+
+int channel_gk20a_commit_va(struct channel_gk20a *c)
+{
+	u64 addr;
+	u32 addr_lo;
+	u32 addr_hi;
+	void *inst_ptr;
+
+	gk20a_dbg_fn("");
+
+	inst_ptr = c->inst_block.cpuva;
+	if (!inst_ptr)
+		return -ENOMEM;
+
+	addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
+	addr_lo = u64_lo32(addr >> 12);
+	addr_hi = u64_hi32(addr);
+
+	gk20a_dbg_info("pde pa=0x%llx addr_lo=0x%x addr_hi=0x%x",
+		   (u64)addr, addr_lo, addr_hi);
+
+	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
+		ram_in_page_dir_base_target_vid_mem_f() |
+		ram_in_page_dir_base_vol_true_f() |
+		ram_in_page_dir_base_lo_f(addr_lo));
+
+	gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
+		ram_in_page_dir_base_hi_f(addr_hi));
+
+	gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_lo_w(),
+		 u64_lo32(c->vm->va_limit) | 0xFFF);
+
+	gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
+		ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
+
+	gk20a_mm_l2_invalidate(c->g);
+
+	return 0;
+}
+
+static int channel_gk20a_commit_userd(struct channel_gk20a *c)
+{
+	u32 addr_lo;
+	u32 addr_hi;
+	void *inst_ptr;
+
+	gk20a_dbg_fn("");
+
+	inst_ptr = c->inst_block.cpuva;
+	if (!inst_ptr)
+		return -ENOMEM;
+
+	addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
+	addr_hi = u64_hi32(c->userd_iova);
+
+	gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
+		c->hw_chid, (u64)c->userd_iova);
+
+	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(),
+		 pbdma_userd_target_vid_mem_f() |
+		 pbdma_userd_addr_f(addr_lo));
+
+	gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(),
+		 pbdma_userd_target_vid_mem_f() |
+		 pbdma_userd_hi_addr_f(addr_hi));
+
+	gk20a_mm_l2_invalidate(c->g);
+
+	return 0;
+}
+
+static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
+				u32 timeslice_timeout)
+{
+	void *inst_ptr;
+	int shift = 3;
+	int value = timeslice_timeout;
+
+	inst_ptr = c->inst_block.cpuva;
+	if (!inst_ptr)
+		return -ENOMEM;
+
+	/* disable channel */
+	gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
+		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
+		ccsr_channel_enable_clr_true_f());
+
+	/* preempt the channel */
+	WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
+
+	/* flush GPU cache */
+	gk20a_mm_l2_flush(c->g, true);
+
+	/* value field is 8 bits long */
+	while (value >= 1 << 8) {
+		value >>= 1;
+		shift++;
+	}
+
+	/* time slice register is only 18bits long */
+	if ((value << shift) >= 1<<19) {
+		pr_err("Requested timeslice value is clamped to 18 bits\n");
+		value = 255;
+		shift = 10;
+	}
+
+	/* set new timeslice */
+	gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
+		value | (shift << 12) |
+		fifo_eng_timeslice_enable_true_f());
+
+	/* enable channel */
+	gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
+		gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
+		ccsr_channel_enable_set_true_f());
+
+	gk20a_mm_l2_invalidate(c->g);
+
+	return 0;
+}
+
+static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
+				u64 gpfifo_base, u32 gpfifo_entries)
+{
+	void *inst_ptr;
+
+	gk20a_dbg_fn("");
+
+	inst_ptr = c->inst_block.cpuva;
+	if (!inst_ptr)
+		return -ENOMEM;
+
+	memset(inst_ptr, 0, ram_fc_size_val_v());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
+		pbdma_gp_base_offset_f(
+		u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(),
+		pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
+		pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(),
+		 pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(),
+		pbdma_formats_gp_fermi0_f() |
+		pbdma_formats_pb_fermi1_f() |
+		pbdma_formats_mp_fermi0_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(),
+		pbdma_pb_header_priv_user_f() |
+		pbdma_pb_header_method_zero_f() |
+		pbdma_pb_header_subchannel_zero_f() |
+		pbdma_pb_header_level_main_f() |
+		pbdma_pb_header_first_true_f() |
+		pbdma_pb_header_type_inc_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(),
+		pbdma_subdevice_id_f(1) |
+		pbdma_subdevice_status_active_f() |
+		pbdma_subdevice_channel_dma_enable_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(),
+		pbdma_acquire_retry_man_2_f() |
+		pbdma_acquire_retry_exp_2_f() |
+		pbdma_acquire_timeout_exp_max_f() |
+		pbdma_acquire_timeout_man_max_f() |
+		pbdma_acquire_timeout_en_disable_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_eng_timeslice_w(),
+		fifo_eng_timeslice_timeout_128_f() |
+		fifo_eng_timeslice_timescale_3_f() |
+		fifo_eng_timeslice_enable_true_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_pb_timeslice_w(),
+		fifo_pb_timeslice_timeout_16_f() |
+		fifo_pb_timeslice_timescale_0_f() |
+		fifo_pb_timeslice_enable_true_f());
+
+	gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
+
+	/* TBD: alwasy priv mode? */
+	gk20a_mem_wr32(inst_ptr, ram_fc_hce_ctrl_w(),
+		 pbdma_hce_ctrl_hce_priv_mode_yes_f());
+
+	gk20a_mm_l2_invalidate(c->g);
+
+	return 0;
+}
+
+static int channel_gk20a_setup_userd(struct channel_gk20a *c)
+{
+	BUG_ON(!c->userd_cpu_va);
+
+	gk20a_dbg_fn("");
+
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_put_hi_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_ref_threshold_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_top_level_get_hi_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_get_hi_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
+	gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
+
+	gk20a_mm_l2_invalidate(c->g);
+
+	return 0;
+}
+
+static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a)
+{
+	struct gk20a *g = ch_gk20a->g;
+	struct fifo_gk20a *f = &g->fifo;
+	struct fifo_engine_info_gk20a *engine_info =
+		f->engine_info + ENGINE_GR_GK20A;
+
+	u32 inst_ptr = ch_gk20a->inst_block.cpu_pa
+		>> ram_in_base_shift_v();
+
+	gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
+		ch_gk20a->hw_chid, inst_ptr);
+
+	ch_gk20a->bound = true;
+
+	gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
+		(gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
+		 ~ccsr_channel_runlist_f(~0)) |
+		 ccsr_channel_runlist_f(engine_info->runlist_id));
+
+	gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
+		ccsr_channel_inst_ptr_f(inst_ptr) |
+		ccsr_channel_inst_target_vid_mem_f() |
+		ccsr_channel_inst_bind_true_f());
+
+	gk20a_writel(g, ccsr_channel_r(ch_gk20a->hw_chid),
+		(gk20a_readl(g, ccsr_channel_r(ch_gk20a->hw_chid)) &
+		 ~ccsr_channel_enable_set_f(~0)) |
+		 ccsr_channel_enable_set_true_f());
+}
+
+static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
+{
+	struct gk20a *g = ch_gk20a->g;
+
+	gk20a_dbg_fn("");
+
+	if (ch_gk20a->bound)
+		gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
+			ccsr_channel_inst_ptr_f(0) |
+			ccsr_channel_inst_bind_false_f());
+
+	ch_gk20a->bound = false;
+}
+
+static int channel_gk20a_alloc_inst(struct gk20a *g,
+				struct channel_gk20a *ch)
+{
+	struct device *d = dev_from_gk20a(g);
+	int err = 0;
+	dma_addr_t iova;
+
+	gk20a_dbg_fn("");
+
+	ch->inst_block.size = ram_in_alloc_size_v();
+	ch->inst_block.cpuva = dma_alloc_coherent(d,
+					ch->inst_block.size,
+					&iova,
+					GFP_KERNEL);
+	if (!ch->inst_block.cpuva) {
+		gk20a_err(d, "%s: memory allocation failed\n", __func__);
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	ch->inst_block.iova = iova;
+	ch->inst_block.cpu_pa = gk20a_get_phys_from_iova(d,
+							ch->inst_block.iova);
+	if (!ch->inst_block.cpu_pa) {
+		gk20a_err(d, "%s: failed to get physical address\n", __func__);
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
+		ch->hw_chid, (u64)ch->inst_block.cpu_pa);
+
+	gk20a_dbg_fn("done");
+	return 0;
+
+clean_up:
+	gk20a_err(d, "fail");
+	channel_gk20a_free_inst(g, ch);
+	return err;
+}
+
+static void channel_gk20a_free_inst(struct gk20a *g,
+				struct channel_gk20a *ch)
+{
+	struct device *d = dev_from_gk20a(g);
+
+	if (ch->inst_block.cpuva)
+		dma_free_coherent(d, ch->inst_block.size,
+				ch->inst_block.cpuva, ch->inst_block.iova);
+	ch->inst_block.cpuva = NULL;
+	ch->inst_block.iova = 0;
+	memset(&ch->inst_block, 0, sizeof(struct inst_desc));
+}
+
+static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
+{
+	return gk20a_fifo_update_runlist(c->g, 0, c->hw_chid, add, true);
+}
+
+void gk20a_disable_channel_no_update(struct channel_gk20a *ch)
+{
+	/* ensure no fences are pending */
+	if (ch->sync)
+		ch->sync->set_min_eq_max(ch->sync);
+
+	/* disable channel */
+	gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
+		     gk20a_readl(ch->g,
+		     ccsr_channel_r(ch->hw_chid)) |
+		     ccsr_channel_enable_clr_true_f());
+}
+
+static int gk20a_wait_channel_idle(struct channel_gk20a *ch)
+{
+	bool channel_idle = false;
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(gk20a_get_gr_idle_timeout(ch->g));
+
+	do {
+		mutex_lock(&ch->jobs_lock);
+		channel_idle = list_empty(&ch->jobs);
+		mutex_unlock(&ch->jobs_lock);
+		if (channel_idle)
+			break;
+
+		usleep_range(1000, 3000);
+	} while (time_before(jiffies, end_jiffies)
+			|| !tegra_platform_is_silicon());
+
+	if (!channel_idle)
+		gk20a_err(dev_from_gk20a(ch->g), "channel jobs not freed");
+
+	return 0;
+}
+
+void gk20a_disable_channel(struct channel_gk20a *ch,
+			   bool finish,
+			   unsigned long finish_timeout)
+{
+	if (finish) {
+		int err = gk20a_channel_finish(ch, finish_timeout);
+		WARN_ON(err);
+	}
+
+	/* disable the channel from hw and increment syncpoints */
+	gk20a_disable_channel_no_update(ch);
+
+	gk20a_wait_channel_idle(ch);
+
+	/* preempt the channel */
+	gk20a_fifo_preempt_channel(ch->g, ch->hw_chid);
+
+	/* remove channel from runlist */
+	channel_gk20a_update_runlist(ch, false);
+}
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+
+static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
+{
+	/* disable existing cyclestats buffer */
+	mutex_lock(&ch->cyclestate.cyclestate_buffer_mutex);
+	if (ch->cyclestate.cyclestate_buffer_handler) {
+		dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
+				ch->cyclestate.cyclestate_buffer);
+		dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
+		ch->cyclestate.cyclestate_buffer_handler = NULL;
+		ch->cyclestate.cyclestate_buffer = NULL;
+		ch->cyclestate.cyclestate_buffer_size = 0;
+	}
+	mutex_unlock(&ch->cyclestate.cyclestate_buffer_mutex);
+}
+
+static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
+		       struct nvhost_cycle_stats_args *args)
+{
+	struct dma_buf *dmabuf;
+	void *virtual_address;
+
+	if (args->nvmap_handle && !ch->cyclestate.cyclestate_buffer_handler) {
+
+		/* set up new cyclestats buffer */
+		dmabuf = dma_buf_get(args->nvmap_handle);
+		if (IS_ERR(dmabuf))
+			return PTR_ERR(dmabuf);
+		virtual_address = dma_buf_vmap(dmabuf);
+		if (!virtual_address)
+			return -ENOMEM;
+
+		ch->cyclestate.cyclestate_buffer_handler = dmabuf;
+		ch->cyclestate.cyclestate_buffer = virtual_address;
+		ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
+		return 0;
+
+	} else if (!args->nvmap_handle &&
+			ch->cyclestate.cyclestate_buffer_handler) {
+		gk20a_free_cycle_stats_buffer(ch);
+		return 0;
+
+	} else if (!args->nvmap_handle &&
+			!ch->cyclestate.cyclestate_buffer_handler) {
+		/* no requst from GL */
+		return 0;
+
+	} else {
+		pr_err("channel already has cyclestats buffer\n");
+		return -EINVAL;
+	}
+}
+#endif
+
+static int gk20a_init_error_notifier(struct channel_gk20a *ch,
+		struct nvhost_set_error_notifier *args) {
+	void *va;
+
+	struct dma_buf *dmabuf;
+
+	if (!args->mem) {
+		pr_err("gk20a_init_error_notifier: invalid memory handle\n");
+		return -EINVAL;
+	}
+
+	dmabuf = dma_buf_get(args->mem);
+
+	if (ch->error_notifier_ref)
+		gk20a_free_error_notifiers(ch);
+
+	if (IS_ERR(dmabuf)) {
+		pr_err("Invalid handle: %d\n", args->mem);
+		return -EINVAL;
+	}
+	/* map handle */
+	va = dma_buf_vmap(dmabuf);
+	if (!va) {
+		dma_buf_put(dmabuf);
+		pr_err("Cannot map notifier handle\n");
+		return -ENOMEM;
+	}
+
+	/* set channel notifiers pointer */
+	ch->error_notifier_ref = dmabuf;
+	ch->error_notifier = va + args->offset;
+	ch->error_notifier_va = va;
+	memset(ch->error_notifier, 0, sizeof(struct nvhost_notification));
+	return 0;
+}
+
+void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
+{
+	if (ch->error_notifier_ref) {
+		struct timespec time_data;
+		u64 nsec;
+		getnstimeofday(&time_data);
+		nsec = ((u64)time_data.tv_sec) * 1000000000u +
+				(u64)time_data.tv_nsec;
+		ch->error_notifier->time_stamp.nanoseconds[0] =
+				(u32)nsec;
+		ch->error_notifier->time_stamp.nanoseconds[1] =
+				(u32)(nsec >> 32);
+		ch->error_notifier->info32 = error;
+		ch->error_notifier->status = 0xffff;
+		gk20a_err(dev_from_gk20a(ch->g),
+				"error notifier set to %d\n", error);
+	}
+}
+
+static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
+{
+	if (ch->error_notifier_ref) {
+		dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
+		dma_buf_put(ch->error_notifier_ref);
+		ch->error_notifier_ref = 0;
+		ch->error_notifier = 0;
+		ch->error_notifier_va = 0;
+	}
+}
+
+void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
+{
+	struct gk20a *g = ch->g;
+	struct device *d = dev_from_gk20a(g);
+	struct fifo_gk20a *f = &g->fifo;
+	struct gr_gk20a *gr = &g->gr;
+	struct vm_gk20a *ch_vm = ch->vm;
+	unsigned long timeout = gk20a_get_gr_idle_timeout(g);
+	struct dbg_session_gk20a *dbg_s;
+
+	gk20a_dbg_fn("");
+
+	/* if engine reset was deferred, perform it now */
+	mutex_lock(&f->deferred_reset_mutex);
+	if (g->fifo.deferred_reset_pending) {
+		gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
+			   " deferred, running now");
+		fifo_gk20a_finish_mmu_fault_handling(g, g->fifo.mmu_fault_engines);
+		g->fifo.mmu_fault_engines = 0;
+		g->fifo.deferred_reset_pending = false;
+	}
+	mutex_unlock(&f->deferred_reset_mutex);
+
+	if (!ch->bound)
+		return;
+
+	if (!gk20a_channel_as_bound(ch))
+		goto unbind;
+
+	gk20a_dbg_info("freeing bound channel context, timeout=%ld",
+			timeout);
+
+	gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout);
+
+	gk20a_free_error_notifiers(ch);
+
+	/* release channel ctx */
+	gk20a_free_channel_ctx(ch);
+
+	gk20a_gr_flush_channel_tlb(gr);
+
+	memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
+
+	/* free gpfifo */
+	if (ch->gpfifo.gpu_va)
+		gk20a_gmmu_unmap(ch_vm, ch->gpfifo.gpu_va,
+			ch->gpfifo.size, gk20a_mem_flag_none);
+	if (ch->gpfifo.cpu_va)
+		dma_free_coherent(d, ch->gpfifo.size,
+			ch->gpfifo.cpu_va, ch->gpfifo.iova);
+	ch->gpfifo.cpu_va = NULL;
+	ch->gpfifo.iova = 0;
+
+	gk20a_mm_l2_invalidate(ch->g);
+
+	memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	gk20a_free_cycle_stats_buffer(ch);
+#endif
+
+	channel_gk20a_free_priv_cmdbuf(ch);
+
+	if (ch->sync) {
+		ch->sync->destroy(ch->sync);
+		ch->sync = NULL;
+	}
+
+	/* release channel binding to the as_share */
+	gk20a_as_release_share(ch_vm->as_share);
+
+unbind:
+	channel_gk20a_unbind(ch);
+	channel_gk20a_free_inst(g, ch);
+
+	ch->vpr = false;
+	ch->vm = NULL;
+	WARN_ON(ch->sync);
+
+	/* unlink all debug sessions */
+	mutex_lock(&ch->dbg_s_lock);
+
+	list_for_each_entry(dbg_s, &ch->dbg_s_list, dbg_s_list_node) {
+		dbg_s->ch = NULL;
+		list_del_init(&dbg_s->dbg_s_list_node);
+	}
+
+	mutex_unlock(&ch->dbg_s_lock);
+
+	/* ALWAYS last */
+	release_used_channel(f, ch);
+}
+
+int gk20a_channel_release(struct inode *inode, struct file *filp)
+{
+	struct channel_gk20a *ch = (struct channel_gk20a *)filp->private_data;
+	struct gk20a *g = ch->g;
+
+	trace_gk20a_channel_release(dev_name(&g->dev->dev));
+
+	gk20a_channel_busy(ch->g->dev);
+	gk20a_free_channel(ch, true);
+	gk20a_channel_idle(ch->g->dev);
+
+	gk20a_put_client(g);
+	filp->private_data = NULL;
+	return 0;
+}
+
+static struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	struct channel_gk20a *ch;
+
+	ch = acquire_unused_channel(f);
+	if (ch == NULL) {
+		/* TBD: we want to make this virtualizable */
+		gk20a_err(dev_from_gk20a(g), "out of hw chids");
+		return 0;
+	}
+
+	ch->g = g;
+
+	if (channel_gk20a_alloc_inst(g, ch)) {
+		ch->in_use = false;
+		gk20a_err(dev_from_gk20a(g),
+			   "failed to open gk20a channel, out of inst mem");
+
+		return 0;
+	}
+	g->ops.fifo.bind_channel(ch);
+	ch->pid = current->pid;
+
+	/* reset timeout counter and update timestamp */
+	ch->timeout_accumulated_ms = 0;
+	ch->timeout_gpfifo_get = 0;
+	/* set gr host default timeout */
+	ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
+	ch->timeout_debug_dump = true;
+	ch->has_timedout = false;
+
+	/* The channel is *not* runnable at this point. It still needs to have
+	 * an address space bound and allocate a gpfifo and grctx. */
+
+	init_waitqueue_head(&ch->notifier_wq);
+	init_waitqueue_head(&ch->semaphore_wq);
+	init_waitqueue_head(&ch->submit_wq);
+
+	return ch;
+}
+
+static int __gk20a_channel_open(struct gk20a *g, struct file *filp)
+{
+	int err;
+	struct channel_gk20a *ch;
+
+	trace_gk20a_channel_open(dev_name(&g->dev->dev));
+
+	err = gk20a_get_client(g);
+	if (err) {
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get client ref");
+		return err;
+	}
+
+	err = gk20a_channel_busy(g->dev);
+	if (err) {
+		gk20a_put_client(g);
+		gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
+		return err;
+	}
+	ch = gk20a_open_new_channel(g);
+	gk20a_channel_idle(g->dev);
+	if (!ch) {
+		gk20a_put_client(g);
+		gk20a_err(dev_from_gk20a(g),
+			"failed to get f");
+		return -ENOMEM;
+	}
+
+	filp->private_data = ch;
+	return 0;
+}
+
+int gk20a_channel_open(struct inode *inode, struct file *filp)
+{
+	struct gk20a *g = container_of(inode->i_cdev,
+			struct gk20a, channel.cdev);
+	return __gk20a_channel_open(g, filp);
+}
+
+/* allocate private cmd buffer.
+   used for inserting commands before/after user submitted buffers. */
+static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
+{
+	struct device *d = dev_from_gk20a(c->g);
+	struct vm_gk20a *ch_vm = c->vm;
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	struct priv_cmd_entry *e;
+	u32 i = 0, size;
+	int err = 0;
+	struct sg_table *sgt;
+	dma_addr_t iova;
+
+	/* Kernel can insert gpfifos before and after user gpfifos.
+	   Before user gpfifos, kernel inserts fence_wait, which takes
+	   syncpoint_a (2 dwords) + syncpoint_b (2 dwords) = 4 dwords.
+	   After user gpfifos, kernel inserts fence_get, which takes
+	   wfi (2 dwords) + syncpoint_a (2 dwords) + syncpoint_b (2 dwords)
+	   = 6 dwords.
+	   Worse case if kernel adds both of them for every user gpfifo,
+	   max size of priv_cmdbuf is :
+	   (gpfifo entry number * (2 / 3) * (4 + 6) * 4 bytes */
+	size = roundup_pow_of_two(
+		c->gpfifo.entry_num * 2 * 10 * sizeof(u32) / 3);
+
+	q->mem.base_cpuva = dma_alloc_coherent(d, size,
+					&iova,
+					GFP_KERNEL);
+	if (!q->mem.base_cpuva) {
+		gk20a_err(d, "%s: memory allocation failed\n", __func__);
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	q->mem.base_iova = iova;
+	q->mem.size = size;
+
+	err = gk20a_get_sgtable(d, &sgt,
+			q->mem.base_cpuva, q->mem.base_iova, size);
+	if (err) {
+		gk20a_err(d, "%s: failed to create sg table\n", __func__);
+		goto clean_up;
+	}
+
+	memset(q->mem.base_cpuva, 0, size);
+
+	q->base_gpuva = gk20a_gmmu_map(ch_vm, &sgt,
+					size,
+					0, /* flags */
+					gk20a_mem_flag_none);
+	if (!q->base_gpuva) {
+		gk20a_err(d, "ch %d : failed to map gpu va"
+			   "for priv cmd buffer", c->hw_chid);
+		err = -ENOMEM;
+		goto clean_up_sgt;
+	}
+
+	q->size = q->mem.size / sizeof (u32);
+
+	INIT_LIST_HEAD(&q->head);
+	INIT_LIST_HEAD(&q->free);
+
+	/* pre-alloc 25% of priv cmdbuf entries and put them on free list */
+	for (i = 0; i < q->size / 4; i++) {
+		e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
+		if (!e) {
+			gk20a_err(d, "ch %d: fail to pre-alloc cmd entry",
+				c->hw_chid);
+			err = -ENOMEM;
+			goto clean_up_sgt;
+		}
+		e->pre_alloc = true;
+		list_add(&e->list, &q->free);
+	}
+
+	gk20a_free_sgtable(&sgt);
+
+	return 0;
+
+clean_up_sgt:
+	gk20a_free_sgtable(&sgt);
+clean_up:
+	channel_gk20a_free_priv_cmdbuf(c);
+	return err;
+}
+
+static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
+{
+	struct device *d = dev_from_gk20a(c->g);
+	struct vm_gk20a *ch_vm = c->vm;
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	struct priv_cmd_entry *e;
+	struct list_head *pos, *tmp, *head;
+
+	if (q->size == 0)
+		return;
+
+	if (q->base_gpuva)
+		gk20a_gmmu_unmap(ch_vm, q->base_gpuva,
+				q->mem.size, gk20a_mem_flag_none);
+	if (q->mem.base_cpuva)
+		dma_free_coherent(d, q->mem.size,
+			q->mem.base_cpuva, q->mem.base_iova);
+	q->mem.base_cpuva = NULL;
+	q->mem.base_iova = 0;
+
+	/* free used list */
+	head = &q->head;
+	list_for_each_safe(pos, tmp, head) {
+		e = container_of(pos, struct priv_cmd_entry, list);
+		free_priv_cmdbuf(c, e);
+	}
+
+	/* free free list */
+	head = &q->free;
+	list_for_each_safe(pos, tmp, head) {
+		e = container_of(pos, struct priv_cmd_entry, list);
+		e->pre_alloc = false;
+		free_priv_cmdbuf(c, e);
+	}
+
+	memset(q, 0, sizeof(struct priv_cmd_queue));
+}
+
+/* allocate a cmd buffer with given size. size is number of u32 entries */
+int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
+			     struct priv_cmd_entry **entry)
+{
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	struct priv_cmd_entry *e;
+	struct list_head *node;
+	u32 free_count;
+	u32 size = orig_size;
+	bool no_retry = false;
+
+	gk20a_dbg_fn("size %d", orig_size);
+
+	*entry = NULL;
+
+	/* if free space in the end is less than requested, increase the size
+	 * to make the real allocated space start from beginning. */
+	if (q->put + size > q->size)
+		size = orig_size + (q->size - q->put);
+
+	gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
+			c->hw_chid, q->get, q->put);
+
+TRY_AGAIN:
+	free_count = (q->size - (q->put - q->get) - 1) % q->size;
+
+	if (size > free_count) {
+		if (!no_retry) {
+			recycle_priv_cmdbuf(c);
+			no_retry = true;
+			goto TRY_AGAIN;
+		} else
+			return -EAGAIN;
+	}
+
+	if (unlikely(list_empty(&q->free))) {
+
+		gk20a_dbg_info("ch %d: run out of pre-alloc entries",
+			c->hw_chid);
+
+		e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
+		if (!e) {
+			gk20a_err(dev_from_gk20a(c->g),
+				"ch %d: fail to allocate priv cmd entry",
+				c->hw_chid);
+			return -ENOMEM;
+		}
+	} else  {
+		node = q->free.next;
+		list_del(node);
+		e = container_of(node, struct priv_cmd_entry, list);
+	}
+
+	e->size = orig_size;
+	e->gp_get = c->gpfifo.get;
+	e->gp_put = c->gpfifo.put;
+	e->gp_wrap = c->gpfifo.wrap;
+
+	/* if we have increased size to skip free space in the end, set put
+	   to beginning of cmd buffer (0) + size */
+	if (size != orig_size) {
+		e->ptr = q->mem.base_cpuva;
+		e->gva = q->base_gpuva;
+		q->put = orig_size;
+	} else {
+		e->ptr = q->mem.base_cpuva + q->put;
+		e->gva = q->base_gpuva + q->put * sizeof(u32);
+		q->put = (q->put + orig_size) & (q->size - 1);
+	}
+
+	/* we already handled q->put + size > q->size so BUG_ON this */
+	BUG_ON(q->put > q->size);
+
+	/* add new entry to head since we free from head */
+	list_add(&e->list, &q->head);
+
+	*entry = e;
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+}
+
+/* Don't call this to free an explict cmd entry.
+ * It doesn't update priv_cmd_queue get/put */
+static void free_priv_cmdbuf(struct channel_gk20a *c,
+			     struct priv_cmd_entry *e)
+{
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+
+	if (!e)
+		return;
+
+	list_del(&e->list);
+
+	if (unlikely(!e->pre_alloc))
+		kfree(e);
+	else {
+		memset(e, 0, sizeof(struct priv_cmd_entry));
+		e->pre_alloc = true;
+		list_add(&e->list, &q->free);
+	}
+}
+
+/* free entries if they're no longer being used */
+static void recycle_priv_cmdbuf(struct channel_gk20a *c)
+{
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	struct priv_cmd_entry *e, *tmp;
+	struct list_head *head = &q->head;
+	bool wrap_around, found = false;
+
+	gk20a_dbg_fn("");
+
+	/* Find the most recent free entry. Free it and everything before it */
+	list_for_each_entry(e, head, list) {
+
+		gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
+			"curr get:put:wrap %d:%d:%d",
+			c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
+			c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
+
+		wrap_around = (c->gpfifo.wrap != e->gp_wrap);
+		if (e->gp_get < e->gp_put) {
+			if (c->gpfifo.get >= e->gp_put ||
+			    wrap_around) {
+				found = true;
+				break;
+			} else
+				e->gp_get = c->gpfifo.get;
+		} else if (e->gp_get > e->gp_put) {
+			if (wrap_around &&
+			    c->gpfifo.get >= e->gp_put) {
+				found = true;
+				break;
+			} else
+				e->gp_get = c->gpfifo.get;
+		}
+	}
+
+	if (found)
+		q->get = (e->ptr - q->mem.base_cpuva) + e->size;
+	else {
+		gk20a_dbg_info("no free entry recycled");
+		return;
+	}
+
+	list_for_each_entry_safe_continue(e, tmp, head, list) {
+		free_priv_cmdbuf(c, e);
+	}
+
+	gk20a_dbg_fn("done");
+}
+
+
+static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
+				      struct nvhost_alloc_gpfifo_args *args)
+{
+	struct gk20a *g = c->g;
+	struct device *d = dev_from_gk20a(g);
+	struct vm_gk20a *ch_vm;
+	u32 gpfifo_size;
+	int err = 0;
+	struct sg_table *sgt;
+	dma_addr_t iova;
+
+	/* Kernel can insert one extra gpfifo entry before user submitted gpfifos
+	   and another one after, for internal usage. Triple the requested size. */
+	gpfifo_size = roundup_pow_of_two(args->num_entries * 3);
+
+	if (args->flags & NVHOST_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
+		c->vpr = true;
+
+	/* an address space needs to have been bound at this point.   */
+	if (!gk20a_channel_as_bound(c)) {
+		gk20a_err(d,
+			    "not bound to an address space at time of gpfifo"
+			    " allocation.  Attempting to create and bind to"
+			    " one...");
+		return -EINVAL;
+	}
+	ch_vm = c->vm;
+
+	c->cmds_pending = false;
+	c->last_submit_fence.valid = false;
+
+	c->ramfc.offset = 0;
+	c->ramfc.size = ram_in_ramfc_s() / 8;
+
+	if (c->gpfifo.cpu_va) {
+		gk20a_err(d, "channel %d :"
+			   "gpfifo already allocated", c->hw_chid);
+		return -EEXIST;
+	}
+
+	c->gpfifo.size = gpfifo_size * sizeof(struct gpfifo);
+	c->gpfifo.cpu_va = (struct gpfifo *)dma_alloc_coherent(d,
+						c->gpfifo.size,
+						&iova,
+						GFP_KERNEL);
+	if (!c->gpfifo.cpu_va) {
+		gk20a_err(d, "%s: memory allocation failed\n", __func__);
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	c->gpfifo.iova = iova;
+	c->gpfifo.entry_num = gpfifo_size;
+
+	c->gpfifo.get = c->gpfifo.put = 0;
+
+	err = gk20a_get_sgtable(d, &sgt,
+			c->gpfifo.cpu_va, c->gpfifo.iova, c->gpfifo.size);
+	if (err) {
+		gk20a_err(d, "%s: failed to allocate sg table\n", __func__);
+		goto clean_up;
+	}
+
+	c->gpfifo.gpu_va = gk20a_gmmu_map(ch_vm,
+					&sgt,
+					c->gpfifo.size,
+					0, /* flags */
+					gk20a_mem_flag_none);
+	if (!c->gpfifo.gpu_va) {
+		gk20a_err(d, "channel %d : failed to map"
+			   " gpu_va for gpfifo", c->hw_chid);
+		err = -ENOMEM;
+		goto clean_up_sgt;
+	}
+
+	gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
+		c->hw_chid, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+
+	channel_gk20a_setup_ramfc(c, c->gpfifo.gpu_va, c->gpfifo.entry_num);
+
+	channel_gk20a_setup_userd(c);
+	channel_gk20a_commit_userd(c);
+
+	gk20a_mm_l2_invalidate(c->g);
+
+	/* TBD: setup engine contexts */
+
+	err = channel_gk20a_alloc_priv_cmdbuf(c);
+	if (err)
+		goto clean_up_unmap;
+
+	err = channel_gk20a_update_runlist(c, true);
+	if (err)
+		goto clean_up_unmap;
+
+	gk20a_free_sgtable(&sgt);
+
+	gk20a_dbg_fn("done");
+	return 0;
+
+clean_up_unmap:
+	gk20a_gmmu_unmap(ch_vm, c->gpfifo.gpu_va,
+		c->gpfifo.size, gk20a_mem_flag_none);
+clean_up_sgt:
+	gk20a_free_sgtable(&sgt);
+clean_up:
+	dma_free_coherent(d, c->gpfifo.size,
+		c->gpfifo.cpu_va, c->gpfifo.iova);
+	c->gpfifo.cpu_va = NULL;
+	c->gpfifo.iova = 0;
+	memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
+	gk20a_err(d, "fail");
+	return err;
+}
+
+static inline int wfi_cmd_size(void)
+{
+	return 2;
+}
+void add_wfi_cmd(struct priv_cmd_entry *cmd, int *i)
+{
+	/* wfi */
+	cmd->ptr[(*i)++] = 0x2001001E;
+	/* handle, ignored */
+	cmd->ptr[(*i)++] = 0x00000000;
+}
+
+static inline bool check_gp_put(struct gk20a *g,
+				struct channel_gk20a *c)
+{
+	u32 put;
+	/* gp_put changed unexpectedly since last update? */
+	put = gk20a_bar1_readl(g,
+	       c->userd_gpu_va + 4 * ram_userd_gp_put_w());
+	if (c->gpfifo.put != put) {
+		/*TBD: BUG_ON/teardown on this*/
+		gk20a_err(dev_from_gk20a(g), "gp_put changed unexpectedly "
+			   "since last update");
+		c->gpfifo.put = put;
+		return false; /* surprise! */
+	}
+	return true; /* checked out ok */
+}
+
+/* Update with this periodically to determine how the gpfifo is draining. */
+static inline u32 update_gp_get(struct gk20a *g,
+				struct channel_gk20a *c)
+{
+	u32 new_get = gk20a_bar1_readl(g,
+		c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
+	if (new_get < c->gpfifo.get)
+		c->gpfifo.wrap = !c->gpfifo.wrap;
+	c->gpfifo.get = new_get;
+	return new_get;
+}
+
+static inline u32 gp_free_count(struct channel_gk20a *c)
+{
+	return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
+		c->gpfifo.entry_num;
+}
+
+bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
+		u32 timeout_delta_ms)
+{
+	u32 gpfifo_get = update_gp_get(ch->g, ch);
+	/* Count consequent timeout isr */
+	if (gpfifo_get == ch->timeout_gpfifo_get) {
+		/* we didn't advance since previous channel timeout check */
+		ch->timeout_accumulated_ms += timeout_delta_ms;
+	} else {
+		/* first timeout isr encountered */
+		ch->timeout_accumulated_ms = timeout_delta_ms;
+	}
+
+	ch->timeout_gpfifo_get = gpfifo_get;
+
+	return ch->g->timeouts_enabled &&
+		ch->timeout_accumulated_ms > ch->timeout_ms_max;
+}
+
+
+/* Issue a syncpoint increment *preceded* by a wait-for-idle
+ * command.  All commands on the channel will have been
+ * consumed at the time the fence syncpoint increment occurs.
+ */
+static int gk20a_channel_submit_wfi(struct channel_gk20a *c)
+{
+	struct priv_cmd_entry *cmd = NULL;
+	struct gk20a *g = c->g;
+	u32 free_count;
+	int err;
+
+	if (c->has_timedout)
+		return -ETIMEDOUT;
+
+	if (!c->sync) {
+		c->sync = gk20a_channel_sync_create(c);
+		if (!c->sync)
+			return -ENOMEM;
+	}
+
+	update_gp_get(g, c);
+	free_count = gp_free_count(c);
+	if (unlikely(!free_count)) {
+		gk20a_err(dev_from_gk20a(g),
+			   "not enough gpfifo space");
+		return -EAGAIN;
+	}
+
+	err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence);
+	if (unlikely(err))
+		return err;
+
+	WARN_ON(!c->last_submit_fence.wfi);
+
+	c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva);
+	c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) |
+		pbdma_gp_entry1_length_f(cmd->size);
+
+	c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
+
+	/* save gp_put */
+	cmd->gp_put = c->gpfifo.put;
+
+	gk20a_bar1_writel(g,
+		c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
+		c->gpfifo.put);
+
+	gk20a_dbg_info("post-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	return 0;
+}
+
+static u32 get_gp_free_count(struct channel_gk20a *c)
+{
+	update_gp_get(c->g, c);
+	return gp_free_count(c);
+}
+
+static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g)
+{
+	void *mem = NULL;
+	unsigned int words;
+	u64 offset;
+	struct dma_buf *dmabuf = NULL;
+
+	if (gk20a_debug_trace_cmdbuf) {
+		u64 gpu_va = (u64)g->entry0 |
+			(u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
+		int err;
+
+		words = pbdma_gp_entry1_length_v(g->entry1);
+		err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
+		if (!err)
+			mem = dma_buf_vmap(dmabuf);
+	}
+
+	if (mem) {
+		u32 i;
+		/*
+		 * Write in batches of 128 as there seems to be a limit
+		 * of how much you can output to ftrace at once.
+		 */
+		for (i = 0; i < words; i += 128U) {
+			trace_gk20a_push_cmdbuf(
+				c->g->dev->name,
+				0,
+				min(words - i, 128U),
+				offset + i * sizeof(u32),
+				mem);
+		}
+		dma_buf_vunmap(dmabuf, mem);
+	}
+}
+
+static int gk20a_channel_add_job(struct channel_gk20a *c,
+				 struct gk20a_channel_fence *fence)
+{
+	struct vm_gk20a *vm = c->vm;
+	struct channel_gk20a_job *job = NULL;
+	struct mapped_buffer_node **mapped_buffers = NULL;
+	int err = 0, num_mapped_buffers;
+
+	/* job needs reference to this vm */
+	gk20a_vm_get(vm);
+
+	err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers);
+	if (err) {
+		gk20a_vm_put(vm);
+		return err;
+	}
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job) {
+		gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
+		gk20a_vm_put(vm);
+		return -ENOMEM;
+	}
+
+	job->num_mapped_buffers = num_mapped_buffers;
+	job->mapped_buffers = mapped_buffers;
+	job->fence = *fence;
+
+	mutex_lock(&c->jobs_lock);
+	list_add_tail(&job->list, &c->jobs);
+	mutex_unlock(&c->jobs_lock);
+
+	return 0;
+}
+
+void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
+{
+	struct gk20a *g = c->g;
+	struct vm_gk20a *vm = c->vm;
+	struct channel_gk20a_job *job, *n;
+	int i;
+
+	wake_up(&c->submit_wq);
+
+	mutex_lock(&c->jobs_lock);
+	list_for_each_entry_safe(job, n, &c->jobs, list) {
+		bool completed = WARN_ON(!c->sync) ||
+			c->sync->is_expired(c->sync, &job->fence);
+		if (!completed)
+			break;
+
+		gk20a_vm_put_buffers(vm, job->mapped_buffers,
+				job->num_mapped_buffers);
+
+		/* job is done. release its reference to vm */
+		gk20a_vm_put(vm);
+
+		list_del_init(&job->list);
+		kfree(job);
+		gk20a_channel_idle(g->dev);
+	}
+	mutex_unlock(&c->jobs_lock);
+
+	for (i = 0; i < nr_completed; i++)
+		gk20a_channel_idle(c->g->dev);
+}
+
+static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
+				struct nvhost_gpfifo *gpfifo,
+				u32 num_entries,
+				struct nvhost_fence *fence,
+				u32 flags)
+{
+	struct gk20a *g = c->g;
+	struct device *d = dev_from_gk20a(g);
+	u32 err = 0;
+	int i;
+	struct priv_cmd_entry *wait_cmd = NULL;
+	struct priv_cmd_entry *incr_cmd = NULL;
+	/* we might need two extra gpfifo entries - one for pre fence
+	 * and one for post fence. */
+	const int extra_entries = 2;
+
+	if (c->has_timedout)
+		return -ETIMEDOUT;
+
+	if ((flags & (NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
+		      NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
+	    !fence)
+		return -EINVAL;
+
+	if (!c->sync) {
+		c->sync = gk20a_channel_sync_create(c);
+		if (!c->sync)
+			return -ENOMEM;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	/* update debug settings */
+	if (g->ops.ltc.sync_debugfs)
+		g->ops.ltc.sync_debugfs(g);
+#endif
+
+	gk20a_dbg_info("channel %d", c->hw_chid);
+
+	/* gk20a_channel_update releases this ref. */
+	gk20a_channel_busy(g->dev);
+
+	trace_gk20a_channel_submit_gpfifo(c->g->dev->name,
+					  c->hw_chid,
+					  num_entries,
+					  flags,
+					  fence->syncpt_id, fence->value);
+	check_gp_put(g, c);
+	update_gp_get(g, c);
+
+	gk20a_dbg_info("pre-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	/* Invalidate tlb if it's dirty...                                   */
+	/* TBD: this should be done in the cmd stream, not with PRIs.        */
+	/* We don't know what context is currently running...                */
+	/* Note also: there can be more than one context associated with the */
+	/* address space (vm).   */
+	gk20a_mm_tlb_invalidate(c->vm);
+
+	/* Make sure we have enough space for gpfifo entries. If not,
+	 * wait for signals from completed submits */
+	if (gp_free_count(c) < num_entries + extra_entries) {
+		err = wait_event_interruptible(c->submit_wq,
+			get_gp_free_count(c) >= num_entries + extra_entries ||
+			c->has_timedout);
+	}
+
+	if (c->has_timedout) {
+		err = -ETIMEDOUT;
+		goto clean_up;
+	}
+
+	if (err) {
+		gk20a_err(d, "not enough gpfifo space");
+		err = -EAGAIN;
+		goto clean_up;
+	}
+
+	/*
+	 * optionally insert syncpt wait in the beginning of gpfifo submission
+	 * when user requested and the wait hasn't expired.
+	 * validate that the id makes sense, elide if not
+	 * the only reason this isn't being unceremoniously killed is to
+	 * keep running some tests which trigger this condition
+	 */
+	if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
+		if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
+			err = c->sync->wait_fd(c->sync, fence->syncpt_id,
+					&wait_cmd);
+		else
+			err = c->sync->wait_syncpt(c->sync, fence->syncpt_id,
+					fence->value, &wait_cmd);
+	}
+	if (err)
+		goto clean_up;
+
+
+	/* always insert syncpt increment at end of gpfifo submission
+	   to keep track of method completion for idle railgating */
+	if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
+			flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
+		err = c->sync->incr_user_fd(c->sync, &incr_cmd,
+					    &c->last_submit_fence,
+					    &fence->syncpt_id);
+	else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+		err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
+						&c->last_submit_fence,
+						&fence->syncpt_id,
+						&fence->value);
+	else
+		err = c->sync->incr(c->sync, &incr_cmd,
+				    &c->last_submit_fence);
+	if (err)
+		goto clean_up;
+
+	if (wait_cmd) {
+		c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
+			u64_lo32(wait_cmd->gva);
+		c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
+			u64_hi32(wait_cmd->gva) |
+			pbdma_gp_entry1_length_f(wait_cmd->size);
+		trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+
+		c->gpfifo.put = (c->gpfifo.put + 1) &
+			(c->gpfifo.entry_num - 1);
+
+		/* save gp_put */
+		wait_cmd->gp_put = c->gpfifo.put;
+	}
+
+	for (i = 0; i < num_entries; i++) {
+		c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
+			gpfifo[i].entry0; /* cmd buf va low 32 */
+		c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
+			gpfifo[i].entry1; /* cmd buf va high 32 | words << 10 */
+		trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+		c->gpfifo.put = (c->gpfifo.put + 1) &
+			(c->gpfifo.entry_num - 1);
+	}
+
+	if (incr_cmd) {
+		c->gpfifo.cpu_va[c->gpfifo.put].entry0 =
+			u64_lo32(incr_cmd->gva);
+		c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
+			u64_hi32(incr_cmd->gva) |
+			pbdma_gp_entry1_length_f(incr_cmd->size);
+		trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+
+		c->gpfifo.put = (c->gpfifo.put + 1) &
+			(c->gpfifo.entry_num - 1);
+
+		/* save gp_put */
+		incr_cmd->gp_put = c->gpfifo.put;
+	}
+
+	/* Invalidate tlb if it's dirty...                                   */
+	/* TBD: this should be done in the cmd stream, not with PRIs.        */
+	/* We don't know what context is currently running...                */
+	/* Note also: there can be more than one context associated with the */
+	/* address space (vm).   */
+	gk20a_mm_tlb_invalidate(c->vm);
+
+	trace_gk20a_channel_submitted_gpfifo(c->g->dev->name,
+					     c->hw_chid,
+					     num_entries,
+					     flags,
+					     fence->syncpt_id, fence->value);
+
+	/* TODO! Check for errors... */
+	gk20a_channel_add_job(c, &c->last_submit_fence);
+
+	c->cmds_pending = true;
+	gk20a_bar1_writel(g,
+		c->userd_gpu_va + 4 * ram_userd_gp_put_w(),
+		c->gpfifo.put);
+
+	gk20a_dbg_info("post-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	gk20a_dbg_fn("done");
+	return err;
+
+clean_up:
+	gk20a_err(d, "fail");
+	free_priv_cmdbuf(c, wait_cmd);
+	free_priv_cmdbuf(c, incr_cmd);
+	gk20a_channel_idle(g->dev);
+	return err;
+}
+
+void gk20a_remove_channel_support(struct channel_gk20a *c)
+{
+
+}
+
+int gk20a_init_channel_support(struct gk20a *g, u32 chid)
+{
+	struct channel_gk20a *c = g->fifo.channel+chid;
+	c->g = g;
+	c->in_use = false;
+	c->hw_chid = chid;
+	c->bound = false;
+	c->remove_support = gk20a_remove_channel_support;
+	mutex_init(&c->jobs_lock);
+	INIT_LIST_HEAD(&c->jobs);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
+#endif
+	INIT_LIST_HEAD(&c->dbg_s_list);
+	mutex_init(&c->dbg_s_lock);
+
+	return 0;
+}
+
+int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout)
+{
+	int err = 0;
+
+	if (!ch->cmds_pending)
+		return 0;
+
+	/* Do not wait for a timedout channel */
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) {
+		gk20a_dbg_fn("issuing wfi, incr to finish the channel");
+		err = gk20a_channel_submit_wfi(ch);
+	}
+	if (err)
+		return err;
+
+	BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi));
+
+	gk20a_dbg_fn("waiting for channel to finish thresh:%d",
+		      ch->last_submit_fence.thresh);
+
+	err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, timeout);
+	if (WARN_ON(err))
+		dev_warn(dev_from_gk20a(ch->g),
+			 "timed out waiting for gk20a channel to finish");
+	else
+		ch->cmds_pending = false;
+
+	return err;
+}
+
+static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
+					ulong id, u32 offset,
+					u32 payload, long timeout)
+{
+	struct platform_device *pdev = ch->g->dev;
+	struct dma_buf *dmabuf;
+	void *data;
+	u32 *semaphore;
+	int ret = 0;
+	long remain;
+
+	/* do not wait if channel has timed out */
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	dmabuf = dma_buf_get(id);
+	if (IS_ERR(dmabuf)) {
+		gk20a_err(&pdev->dev, "invalid notifier nvmap handle 0x%lx",
+			   id);
+		return -EINVAL;
+	}
+
+	data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
+	if (!data) {
+		gk20a_err(&pdev->dev, "failed to map notifier memory");
+		ret = -EINVAL;
+		goto cleanup_put;
+	}
+
+	semaphore = data + (offset & ~PAGE_MASK);
+
+	remain = wait_event_interruptible_timeout(
+			ch->semaphore_wq,
+			*semaphore == payload || ch->has_timedout,
+			timeout);
+
+	if (remain == 0 && *semaphore != payload)
+		ret = -ETIMEDOUT;
+	else if (remain < 0)
+		ret = remain;
+
+	dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
+cleanup_put:
+	dma_buf_put(dmabuf);
+	return ret;
+}
+
+static int gk20a_channel_wait(struct channel_gk20a *ch,
+			      struct nvhost_wait_args *args)
+{
+	struct device *d = dev_from_gk20a(ch->g);
+	struct dma_buf *dmabuf;
+	struct notification *notif;
+	struct timespec tv;
+	u64 jiffies;
+	ulong id;
+	u32 offset;
+	unsigned long timeout;
+	int remain, ret = 0;
+
+	gk20a_dbg_fn("");
+
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	if (args->timeout == NVHOST_NO_TIMEOUT)
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	else
+		timeout = (u32)msecs_to_jiffies(args->timeout);
+
+	switch (args->type) {
+	case NVHOST_WAIT_TYPE_NOTIFIER:
+		id = args->condition.notifier.nvmap_handle;
+		offset = args->condition.notifier.offset;
+
+		dmabuf = dma_buf_get(id);
+		if (IS_ERR(dmabuf)) {
+			gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
+				   id);
+			return -EINVAL;
+		}
+
+		notif = dma_buf_vmap(dmabuf);
+		if (!notif) {
+			gk20a_err(d, "failed to map notifier memory");
+			return -ENOMEM;
+		}
+
+		notif = (struct notification *)((uintptr_t)notif + offset);
+
+		/* user should set status pending before
+		 * calling this ioctl */
+		remain = wait_event_interruptible_timeout(
+				ch->notifier_wq,
+				notif->status == 0 || ch->has_timedout,
+				timeout);
+
+		if (remain == 0 && notif->status != 0) {
+			ret = -ETIMEDOUT;
+			goto notif_clean_up;
+		} else if (remain < 0) {
+			ret = -EINTR;
+			goto notif_clean_up;
+		}
+
+		/* TBD: fill in correct information */
+		jiffies = get_jiffies_64();
+		jiffies_to_timespec(jiffies, &tv);
+		notif->timestamp.nanoseconds[0] = tv.tv_nsec;
+		notif->timestamp.nanoseconds[1] = tv.tv_sec;
+		notif->info32 = 0xDEADBEEF; /* should be object name */
+		notif->info16 = ch->hw_chid; /* should be method offset */
+
+notif_clean_up:
+		dma_buf_vunmap(dmabuf, notif);
+		return ret;
+
+	case NVHOST_WAIT_TYPE_SEMAPHORE:
+		ret = gk20a_channel_wait_semaphore(ch,
+				args->condition.semaphore.nvmap_handle,
+				args->condition.semaphore.offset,
+				args->condition.semaphore.payload,
+				timeout);
+
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int gk20a_channel_set_priority(struct channel_gk20a *ch,
+		u32 priority)
+{
+	u32 timeslice_timeout;
+	/* set priority of graphics channel */
+	switch (priority) {
+	case NVHOST_PRIORITY_LOW:
+		/* 64 << 3 = 512us */
+		timeslice_timeout = 64;
+		break;
+	case NVHOST_PRIORITY_MEDIUM:
+		/* 128 << 3 = 1024us */
+		timeslice_timeout = 128;
+		break;
+	case NVHOST_PRIORITY_HIGH:
+		/* 255 << 3 = 2048us */
+		timeslice_timeout = 255;
+		break;
+	default:
+		pr_err("Unsupported priority");
+		return -EINVAL;
+	}
+	channel_gk20a_set_schedule_params(ch,
+			timeslice_timeout);
+	return 0;
+}
+
+static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
+			    struct nvhost_zcull_bind_args *args)
+{
+	struct gk20a *g = ch->g;
+	struct gr_gk20a *gr = &g->gr;
+
+	gk20a_dbg_fn("");
+
+	return gr_gk20a_bind_ctxsw_zcull(g, gr, ch,
+				args->gpu_va, args->mode);
+}
+
+/* in this context the "channel" is the host1x channel which
+ * maps to *all* gk20a channels */
+int gk20a_channel_suspend(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 chid;
+	bool channels_in_use = false;
+	struct device *d = dev_from_gk20a(g);
+	int err;
+
+	gk20a_dbg_fn("");
+
+	/* idle the engine by submitting WFI on non-KEPLER_C channel */
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *c = &f->channel[chid];
+		if (c->in_use && c->obj_class != KEPLER_C) {
+			err = gk20a_channel_submit_wfi(c);
+			if (err) {
+				gk20a_err(d, "cannot idle channel %d\n",
+						chid);
+				return err;
+			}
+
+			c->sync->wait_cpu(c->sync, &c->last_submit_fence,
+					  500000);
+			break;
+		}
+	}
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		if (f->channel[chid].in_use) {
+
+			gk20a_dbg_info("suspend channel %d", chid);
+			/* disable channel */
+			gk20a_writel(g, ccsr_channel_r(chid),
+				gk20a_readl(g, ccsr_channel_r(chid)) |
+				ccsr_channel_enable_clr_true_f());
+			/* preempt the channel */
+			gk20a_fifo_preempt_channel(g, chid);
+
+			channels_in_use = true;
+		}
+	}
+
+	if (channels_in_use) {
+		gk20a_fifo_update_runlist(g, 0, ~0, false, true);
+
+		for (chid = 0; chid < f->num_channels; chid++) {
+			if (f->channel[chid].in_use)
+				channel_gk20a_unbind(&f->channel[chid]);
+		}
+	}
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+/* in this context the "channel" is the host1x channel which
+ * maps to *all* gk20a channels */
+int gk20a_channel_resume(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 chid;
+	bool channels_in_use = false;
+
+	gk20a_dbg_fn("");
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		if (f->channel[chid].in_use) {
+			gk20a_dbg_info("resume channel %d", chid);
+			g->ops.fifo.bind_channel(&f->channel[chid]);
+			channels_in_use = true;
+		}
+	}
+
+	if (channels_in_use)
+		gk20a_fifo_update_runlist(g, 0, ~0, true, true);
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
+void gk20a_channel_semaphore_wakeup(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 chid;
+
+	gk20a_dbg_fn("");
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *c = g->fifo.channel+chid;
+		if (c->in_use)
+			wake_up_interruptible_all(&c->semaphore_wq);
+	}
+}
+
+static int gk20a_ioctl_channel_submit_gpfifo(
+	struct channel_gk20a *ch,
+	struct nvhost_submit_gpfifo_args *args)
+{
+	void *gpfifo;
+	u32 size;
+	int ret = 0;
+
+	gk20a_dbg_fn("");
+
+	if (ch->has_timedout)
+		return -ETIMEDOUT;
+
+	size = args->num_entries * sizeof(struct nvhost_gpfifo);
+
+	gpfifo = kzalloc(size, GFP_KERNEL);
+	if (!gpfifo)
+		return -ENOMEM;
+
+	if (copy_from_user(gpfifo,
+			   (void __user *)(uintptr_t)args->gpfifo, size)) {
+		ret = -EINVAL;
+		goto clean_up;
+	}
+
+	ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
+					&args->fence, args->flags);
+
+clean_up:
+	kfree(gpfifo);
+	return ret;
+}
+
+void gk20a_init_fifo(struct gpu_ops *gops)
+{
+	gops->fifo.bind_channel = channel_gk20a_bind;
+}
+
+long gk20a_channel_ioctl(struct file *filp,
+	unsigned int cmd, unsigned long arg)
+{
+	struct channel_gk20a *ch = filp->private_data;
+	struct platform_device *dev = ch->g->dev;
+	u8 buf[NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE];
+	int err = 0;
+
+	if ((_IOC_TYPE(cmd) != NVHOST_IOCTL_MAGIC) ||
+		(_IOC_NR(cmd) == 0) ||
+		(_IOC_NR(cmd) > NVHOST_IOCTL_CHANNEL_LAST) ||
+		(_IOC_SIZE(cmd) > NVHOST_IOCTL_CHANNEL_MAX_ARG_SIZE))
+		return -EFAULT;
+
+	if (_IOC_DIR(cmd) & _IOC_WRITE) {
+		if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
+			return -EFAULT;
+	}
+
+	switch (cmd) {
+	case NVHOST_IOCTL_CHANNEL_OPEN:
+	{
+		int fd;
+		struct file *file;
+		char *name;
+
+		err = get_unused_fd_flags(O_RDWR);
+		if (err < 0)
+			break;
+		fd = err;
+
+		name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
+				dev_name(&dev->dev), fd);
+		if (!name) {
+			err = -ENOMEM;
+			put_unused_fd(fd);
+			break;
+		}
+
+		file = anon_inode_getfile(name, filp->f_op, NULL, O_RDWR);
+		kfree(name);
+		if (IS_ERR(file)) {
+			err = PTR_ERR(file);
+			put_unused_fd(fd);
+			break;
+		}
+		fd_install(fd, file);
+
+		err = __gk20a_channel_open(ch->g, file);
+		if (err) {
+			put_unused_fd(fd);
+			fput(file);
+			break;
+		}
+
+		((struct nvhost_channel_open_args *)buf)->channel_fd = fd;
+		break;
+	}
+	case NVHOST_IOCTL_CHANNEL_SET_NVMAP_FD:
+		break;
+	case NVHOST_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
+		gk20a_channel_busy(dev);
+		err = gk20a_alloc_obj_ctx(ch,
+				(struct nvhost_alloc_obj_ctx_args *)buf);
+		gk20a_channel_idle(dev);
+		break;
+	case NVHOST_IOCTL_CHANNEL_FREE_OBJ_CTX:
+		gk20a_channel_busy(dev);
+		err = gk20a_free_obj_ctx(ch,
+				(struct nvhost_free_obj_ctx_args *)buf);
+		gk20a_channel_idle(dev);
+		break;
+	case NVHOST_IOCTL_CHANNEL_ALLOC_GPFIFO:
+		gk20a_channel_busy(dev);
+		err = gk20a_alloc_channel_gpfifo(ch,
+				(struct nvhost_alloc_gpfifo_args *)buf);
+		gk20a_channel_idle(dev);
+		break;
+	case NVHOST_IOCTL_CHANNEL_SUBMIT_GPFIFO:
+		err = gk20a_ioctl_channel_submit_gpfifo(ch,
+				(struct nvhost_submit_gpfifo_args *)buf);
+		break;
+	case NVHOST_IOCTL_CHANNEL_WAIT:
+		gk20a_channel_busy(dev);
+		err = gk20a_channel_wait(ch,
+				(struct nvhost_wait_args *)buf);
+		gk20a_channel_idle(dev);
+		break;
+	case NVHOST_IOCTL_CHANNEL_ZCULL_BIND:
+		gk20a_channel_busy(dev);
+		err = gk20a_channel_zcull_bind(ch,
+				(struct nvhost_zcull_bind_args *)buf);
+		gk20a_channel_idle(dev);
+		break;
+	case NVHOST_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
+		gk20a_channel_busy(dev);
+		err = gk20a_init_error_notifier(ch,
+				(struct nvhost_set_error_notifier *)buf);
+		gk20a_channel_idle(dev);
+		break;
+#ifdef CONFIG_GK20A_CYCLE_STATS
+	case NVHOST_IOCTL_CHANNEL_CYCLE_STATS:
+		gk20a_channel_busy(dev);
+		err = gk20a_channel_cycle_stats(ch,
+				(struct nvhost_cycle_stats_args *)buf);
+		gk20a_channel_idle(dev);
+		break;
+#endif
+	case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
+	{
+		u32 timeout =
+			(u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+		gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
+			   timeout, ch->hw_chid);
+		ch->timeout_ms_max = timeout;
+		break;
+	}
+	case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT_EX:
+	{
+		u32 timeout =
+			(u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+		bool timeout_debug_dump = !((u32)
+			((struct nvhost_set_timeout_ex_args *)buf)->flags &
+			(1 << NVHOST_TIMEOUT_FLAG_DISABLE_DUMP));
+		gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
+			   timeout, ch->hw_chid);
+		ch->timeout_ms_max = timeout;
+		ch->timeout_debug_dump = timeout_debug_dump;
+		break;
+	}
+	case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
+		((struct nvhost_get_param_args *)buf)->value =
+			ch->has_timedout;
+		break;
+	case NVHOST_IOCTL_CHANNEL_SET_PRIORITY:
+		gk20a_channel_busy(dev);
+		gk20a_channel_set_priority(ch,
+			((struct nvhost_set_priority_args *)buf)->priority);
+		gk20a_channel_idle(dev);
+		break;
+	default:
+		dev_err(&dev->dev, "unrecognized ioctl cmd: 0x%x", cmd);
+		err = -ENOTTY;
+		break;
+	}
+
+	if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
+		err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
+
+	return err;
+}