tegra: dc: set EMC clock dynamically

If the screen is idle (no POST for some time), reduce the DC EMC clock according the windows size. If external display connected, the EMC clock will not be reduced. BUG 828306 Reviewed-on: http://git-master/r/37106 Change-Id: I88c76ef3afe5036f47d91f6540846fd767c399e4 Reviewed-on: http://git-master/r/38149 Reviewed-by: Xin Xie <xxie@nvidia.com> Tested-by: Xin Xie <xxie@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
author: Xin Xie <xxie@nvidia.com> 2011-06-23 14:22:11 -0700
committer: Niket Sirsi <nsirsi@nvidia.com> 2011-07-21 18:01:58 -0700
commit: 84f4fd2fa904ce20aad7732288736c32e9216851 (patch)
tree: 69172c667d533b871d3312a50ffc040bc953df14
parent: bd8d778195bdcdf53fb1e6b9783ab08bf3b42e40 (diff)
5 files changed, 276 insertions, 11 deletions
diff --git a/arch/arm/mach-tegra/include/mach/dc.h b/arch/arm/mach-tegra/include/mach/dc.h
index 25cf8021215a..9b848be43ead 100644
--- a/arch/arm/mach-tegra/include/mach/dc.h
+++ b/arch/arm/mach-tegra/include/mach/dc.h
@@ -360,6 +360,9 @@ u32 tegra_dc_get_syncpt_id(const struct tegra_dc *dc);
 u32 tegra_dc_incr_syncpt_max(struct tegra_dc *dc);
 void tegra_dc_incr_syncpt_min(struct tegra_dc *dc, u32 val);
 
+int tegra_dc_set_default_emc(struct tegra_dc *dc);
+int tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n);
+
 /* tegra_dc_update_windows and tegra_dc_sync_windows do not support windows
  * with differenct dcs in one call
  */
diff --git a/drivers/video/tegra/dc/dc.c b/drivers/video/tegra/dc/dc.c
index 87b30c0d3177..50c51ad1e90d 100644
--- a/drivers/video/tegra/dc/dc.c
+++ b/drivers/video/tegra/dc/dc.c
@@ -31,6 +31,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/switch.h>
+#include <video/tegrafb.h>
 
 #include <mach/clk.h>
 #include <mach/dc.h>
@@ -46,6 +47,16 @@ static int no_vsync;
 
 module_param_named(no_vsync, no_vsync, int, S_IRUGO | S_IWUSR);
 
+static int use_dynamic_emc = 1;
+
+module_param_named(use_dynamic_emc, use_dynamic_emc, int, S_IRUGO | S_IWUSR);
+
+/* set default windows idle time as 2000ms for power saving purpose */
+static int windows_idle_detection_time = 2000;
+
+module_param_named(windows_idle_detection_time, windows_idle_detection_time,
+		   int, S_IRUGO | S_IWUSR);
+
 struct tegra_dc *tegra_dcs[TEGRA_MAX_DC];
 
 DEFINE_MUTEX(tegra_dc_lock);
@@ -329,7 +340,7 @@ static void tegra_dc_dbg_add(struct tegra_dc *dc) {}
 #endif
 
 
-static int tegra_dc_add(struct tegra_dc *dc, int index)
+static int tegra_dc_set(struct tegra_dc *dc, int index)
 {
 	int ret = 0;
 
@@ -339,7 +350,7 @@ static int tegra_dc_add(struct tegra_dc *dc, int index)
 		goto out;
 	}
 
-	if (tegra_dcs[index] != NULL) {
+	if (dc != NULL && tegra_dcs[index] != NULL) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -352,6 +363,20 @@ out:
 	return ret;
 }
 
+static unsigned int tegra_dc_has_multiple_dc(void)
+{
+	unsigned int idx;
+	unsigned int cnt = 0;
+	struct tegra_dc *dc;
+
+	mutex_lock(&tegra_dc_lock);
+	for (idx = 0; idx < TEGRA_MAX_DC; idx++)
+		cnt += ((dc = tegra_dcs[idx]) != NULL && dc->enabled) ? 1 : 0;
+	mutex_unlock(&tegra_dc_lock);
+
+	return (cnt > 1);
+}
+
 struct tegra_dc *tegra_dc_get_dc(unsigned idx)
 {
 	if (idx < TEGRA_MAX_DC)
@@ -476,6 +501,186 @@ static void tegra_dc_set_scaling_filter(struct tegra_dc *dc)
 	}
 }
 
+static unsigned int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a,
+						   struct tegra_dc_win *b)
+{
+	if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b))
+		return 0;
+	return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) ||
+	       ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
+}
+
+static unsigned int tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
+						unsigned int bw[], int n)
+{
+	/* We have n windows and knows their geometries and bandwidthes. If any
+	 * of them overlapped vertically, the overlapped area bandwidth get
+	 * combined.
+	 *
+	 * This function will find the maximum bandwidth of overlapped area.
+	 * If there is no windows overlapped, then return the maximum
+	 * bandwidth of windows.
+	 */
+
+	/* We know win_2 is always overlapped with win_0 and win_1. */
+	if (tegra_dc_windows_is_overlapped(wins[0], wins[1]))
+		return bw[0] + bw[1] + bw[2];
+	else
+		return max(bw[0], bw[1]) + bw[2];
+
+}
+
+/* 8 bits per byte (1 << 3) */
+#define BIT_TO_BYTE_SHIFT 3
+/*
+ * Assuming 50% (X >> 1) efficiency: i.e. if we calculate we need 70MBps, we
+ * will request 140MBps from EMC.
+ */
+#define MEM_EFFICIENCY_SHIFT 1
+static unsigned long tegra_dc_get_emc_rate(struct tegra_dc_win *wins[], int n)
+{
+	int i;
+	unsigned int bw[TEGRA_FB_FLIP_N_WINDOWS];
+	struct tegra_dc_win *w;
+	struct tegra_dc *dc;
+	unsigned int max;
+	unsigned int ret;
+
+	dc = wins[0]->dc;
+
+	if (tegra_dc_has_multiple_dc())
+		return tegra_dc_get_default_emc_clk_rate(dc);
+
+	BUG_ON(n > ARRAY_SIZE(bw));
+	/*
+	 * Calculate peak EMC bandwidth for each enabled window =
+	 * pixel_clock * win_bpp * (use_v_filter ? 2 : 1)) * H_scale_factor *
+	 * (windows_tiling ? 2 : 1)
+	 *
+	 *
+	 * note:
+	 * (*) We use 2 tap V filter, so need double BW if use V filter
+	 * (*) Tiling mode on T30 and DDR3 requires double BW
+	 */
+	for (i = 0; w = wins[i], bw[i] = 0, i < n; i++) {
+		if (!WIN_IS_ENABLED(w))
+			continue;
+		bw[i] = dc->mode.pclk *
+			(tegra_dc_fmt_bpp(w->fmt) >> BIT_TO_BYTE_SHIFT) *
+			(WIN_USE_V_FILTER(w) ? 2 : 1) /
+			w->out_w * w->w *
+			(WIN_IS_TILED(w) ? TILED_WINDOWS_BW_MULTIPLIER : 1);
+	}
+
+	max = tegra_dc_find_max_bandwidth(wins, bw, n) << MEM_EFFICIENCY_SHIFT;
+
+	ret = EMC_BW_TO_FREQ(max);
+
+	/*
+	 * If the calculated peak BW is bigger than board specified BW, then
+	 * either the above calculation is wrong, or board specified BW is
+	 * wrong.
+	 */
+	WARN_ON(ret > tegra_dc_get_default_emc_clk_rate(dc));
+
+	return ret;
+}
+#undef BIT_TO_BYTE_SHIFT
+#undef MEM_EFFICIENCY_SHIFT
+
+static void tegra_dc_change_emc(struct tegra_dc *dc)
+{
+	if (dc->emc_clk_rate != dc->new_emc_clk_rate) {
+		dc->emc_clk_rate = dc->new_emc_clk_rate;
+		clk_set_rate(dc->emc_clk, dc->emc_clk_rate);
+	}
+}
+
+static void tegra_dc_reduce_emc_worker(struct work_struct *work)
+{
+	struct tegra_dc *dc;
+
+	dc = container_of(to_delayed_work(work), struct tegra_dc,
+	    reduce_emc_clk_work);
+
+	mutex_lock(&dc->lock);
+
+	if (!dc->enabled) {
+		mutex_unlock(&dc->lock);
+		return;
+	}
+
+	tegra_dc_change_emc(dc);
+
+	mutex_unlock(&dc->lock);
+}
+
+int  tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n)
+{
+	unsigned long new_rate;
+	struct tegra_dc *dc;
+
+	if (!use_dynamic_emc)
+		return 0;
+
+	dc = windows[0]->dc;
+
+	mutex_lock(&dc->lock);
+
+	if (!dc->enabled) {
+		mutex_unlock(&dc->lock);
+		return -EFAULT;
+	}
+
+	/* calculate the new rate based on this POST */
+	new_rate = tegra_dc_get_emc_rate(windows, n);
+
+	dc->new_emc_clk_rate = new_rate;
+
+	/*
+	 * If we don't need set EMC immediately after a frame POST, we schedule
+	 * a work_queue to reduce EMC in the future. This work_queue task will
+	 * not be executed if the another POST comes before the idle time
+	 * expired.
+	 */
+	if (NEED_UPDATE_EMC_ON_EVERY_FRAME)
+		tegra_dc_change_emc(dc);
+	else
+		schedule_delayed_work(&dc->reduce_emc_clk_work,
+			msecs_to_jiffies(windows_idle_detection_time));
+
+	mutex_unlock(&dc->lock);
+
+	return 0;
+}
+
+int  tegra_dc_set_default_emc(struct tegra_dc *dc)
+{
+	/*
+	 * POST happens whenever this function is called, we first delete any
+	 * reduce_emc_clk_work, then we always set the DC EMC clock to default
+	 * value.
+	 */
+	cancel_delayed_work_sync(&dc->reduce_emc_clk_work);
+
+	if (NEED_UPDATE_EMC_ON_EVERY_FRAME)
+		return 0;
+
+	mutex_lock(&dc->lock);
+
+	if (!dc->enabled) {
+		mutex_unlock(&dc->lock);
+		return -EFAULT;
+	}
+
+	dc->new_emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc);
+	tegra_dc_change_emc(dc);
+
+	mutex_unlock(&dc->lock);
+
+	return 0;
+}
+
 /* does not support updating windows on multiple dcs in one call */
 int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 {
@@ -526,7 +731,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		if (!no_vsync)
 			update_mask |= WIN_A_ACT_REQ << win->idx;
 
-		if (!(win->flags & TEGRA_WIN_FLAG_ENABLED)) {
+		if (!WIN_IS_ENABLED(win)) {
 			tegra_dc_writel(dc, 0, DC_WIN_WIN_OPTIONS);
 			continue;
 		}
@@ -590,7 +795,7 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
 		tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
 
-		if (win->flags & TEGRA_WIN_FLAG_TILED)
+		if (WIN_IS_TILED(win))
 			tegra_dc_writel(dc,
 					DC_WIN_BUFFER_ADDR_MODE_TILE |
 					DC_WIN_BUFFER_ADDR_MODE_TILE_UV,
@@ -607,9 +812,9 @@ int tegra_dc_update_windows(struct tegra_dc_win *windows[], int n)
 		else if (tegra_dc_fmt_bpp(win->fmt) < 24)
 			val |= COLOR_EXPAND;
 
-		if (win->w != win->out_w)
+		if (WIN_USE_H_FILTER(win))
 			val |= H_FILTER_ENABLE;
-		if (win->h != win->out_h)
+		if (WIN_USE_V_FILTER(win))
 			val |= V_FILTER_ENABLE;
 
 		if (invert_h)
@@ -1655,6 +1860,8 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 
 	dc->clk = clk;
 	dc->emc_clk = emc_clk;
+	INIT_DELAYED_WORK(&dc->reduce_emc_clk_work, tegra_dc_reduce_emc_worker);
+
 	dc->base_res = base_res;
 	dc->base = base;
 	dc->irq = irq;
@@ -1665,8 +1872,8 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 	 * The emc is a shared clock, it will be set based on
 	 * the requirements for each user on the bus.
 	 */
-	emc_clk_rate = dc->pdata->emc_clk_rate;
-	clk_set_rate(emc_clk, emc_clk_rate ? emc_clk_rate : ULONG_MAX);
+	dc->emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc);
+	clk_set_rate(emc_clk, dc->emc_clk_rate);
 
 	if (dc->pdata->flags & TEGRA_DC_FLAG_ENABLED)
 		dc->enabled = true;
@@ -1691,7 +1898,7 @@ static int tegra_dc_probe(struct nvhost_device *ndev)
 	/* hack to ballence enable_irq calls in _tegra_dc_enable() */
 	disable_irq(dc->irq);
 
-	ret = tegra_dc_add(dc, ndev->id);
+	ret = tegra_dc_set(dc, ndev->id);
 	if (ret < 0) {
 		dev_err(&ndev->dev, "can't add dc\n");
 		goto err_free_irq;
@@ -1790,6 +1997,7 @@ static int tegra_dc_remove(struct nvhost_device *ndev)
 	if (dc->fb_mem)
 		release_resource(dc->base_res);
 	kfree(dc);
+	tegra_dc_set(NULL, ndev->id);
 	return 0;
 }
 
diff --git a/drivers/video/tegra/dc/dc_priv.h b/drivers/video/tegra/dc/dc_priv.h
index d31c4436dcba..7ee19f69c05b 100644
--- a/drivers/video/tegra/dc/dc_priv.h
+++ b/drivers/video/tegra/dc/dc_priv.h
@@ -25,6 +25,34 @@
 #include <linux/switch.h>
 #include "../host/dev.h"
 
+#define WIN_IS_TILED(win)	((win)->flags & TEGRA_WIN_FLAG_TILED)
+#define WIN_IS_ENABLED(win)	((win)->flags & TEGRA_WIN_FLAG_ENABLED)
+#define WIN_USE_V_FILTER(win)	((win)->h != (win)->out_h)
+#define WIN_USE_H_FILTER(win)	((win)->w != (win)->out_w)
+
+#define NEED_UPDATE_EMC_ON_EVERY_FRAME (windows_idle_detection_time == 0)
+
+/* DDR: 8 bytes transfer per clock */
+#define DDR_BW_TO_FREQ(bw) ((bw) / 8)
+
+#if defined(CONFIG_TEGRA_EMC_TO_DDR_CLOCK)
+#define EMC_BW_TO_FREQ(bw) (DDR_BW_TO_FREQ(bw) * CONFIG_TEGRA_EMC_TO_DDR_CLOCK)
+#else
+#define EMC_BW_TO_FREQ(bw) (DDR_BW_TO_FREQ(bw) * 2)
+#endif
+
+/*
+ * If using T30/DDR3, the 2nd 16 bytes part of DDR3 atom is 2nd line and is
+ * discarded in tiling mode.
+ */
+#if defined(CONFIG_ARCH_TEGRA_2x_SOC)
+#define TILED_WINDOWS_BW_MULTIPLIER 1
+#elif defined(CONFIG_ARCH_TEGRA_3x_SOC)
+#define TILED_WINDOWS_BW_MULTIPLIER 2
+#else
+#warning "need to revisit memory tiling effects on DC"
+#endif
+
 struct tegra_dc;
 
 struct tegra_dc_blend {
@@ -51,8 +79,6 @@ struct tegra_dc_out_ops {
 };
 
 struct tegra_dc {
-	struct list_head		list;
-
 	struct nvhost_device		*ndev;
 	struct tegra_dc_platform_data	*pdata;
 
@@ -62,6 +88,8 @@ struct tegra_dc {
 
 	struct clk			*clk;
 	struct clk			*emc_clk;
+	int				emc_clk_rate;
+	int				new_emc_clk_rate;
 
 	bool				enabled;
 	bool				suspended;
@@ -91,6 +119,7 @@ struct tegra_dc {
 
 	unsigned long			underflow_mask;
 	struct work_struct		reset_work;
+	struct delayed_work		reduce_emc_clk_work;
 
 	struct switch_dev		modeset_switch;
 };
@@ -141,6 +170,12 @@ static inline void *tegra_dc_get_outdata(struct tegra_dc *dc)
 	return dc->out_data;
 }
 
+static inline unsigned long tegra_dc_get_default_emc_clk_rate(
+							struct tegra_dc *dc)
+{
+	return dc->pdata->emc_clk_rate ? dc->pdata->emc_clk_rate : ULONG_MAX;
+}
+
 void tegra_dc_setup_clk(struct tegra_dc *dc, struct clk *clk);
 
 extern struct tegra_dc_out_ops tegra_dc_rgb_ops;
diff --git a/drivers/video/tegra/dc/overlay.c b/drivers/video/tegra/dc/overlay.c
index 63a7f94204c7..e4f34f1d5101 100644
--- a/drivers/video/tegra/dc/overlay.c
+++ b/drivers/video/tegra/dc/overlay.c
@@ -305,9 +305,11 @@ static void tegra_overlay_flip_worker(struct work_struct *work)
 			dcwins[i] = tegra_dc_get_window(overlay->dc, i);
 
 		tegra_overlay_blend_reorder(&overlay->blend, dcwins);
+		tegra_dc_set_dynamic_emc(dcwins, DC_N_WINDOWS);
 		tegra_dc_update_windows(dcwins, DC_N_WINDOWS);
 		tegra_dc_sync_windows(dcwins, DC_N_WINDOWS);
 	} else {
+		tegra_dc_set_dynamic_emc(wins, nr_win);
 		tegra_dc_update_windows(wins, nr_win);
 		/* TODO: implement swapinterval here */
 		tegra_dc_sync_windows(wins, nr_win);
@@ -375,6 +377,12 @@ static int tegra_overlay_flip(struct tegra_overlay_info *overlay,
 
 	queue_work(overlay->flip_wq, &data->work);
 
+	/*
+	 * Before the queued flip_wq get scheduled, we set the EMC clock to the
+	 * default value in order to do FLIP without glitch.
+	 */
+	tegra_dc_set_default_emc(overlay->dc);
+
 	args->post_syncpt_val = syncpt_max;
 	args->post_syncpt_id = tegra_dc_get_syncpt_id(overlay->dc);
 	mutex_unlock(&tegra_flip_lock);
diff --git a/drivers/video/tegra/fb.c b/drivers/video/tegra/fb.c
index e0a39529494d..926fb29c8b54 100644
--- a/drivers/video/tegra/fb.c
+++ b/drivers/video/tegra/fb.c
@@ -305,6 +305,8 @@ static int tegra_fb_pan_display(struct fb_var_screeninfo *var,
 		tegra_fb->win->phys_addr = addr;
 		/* TODO: update virt_addr */
 
+		tegra_dc_set_default_emc(tegra_fb->win->dc);
+		tegra_dc_set_dynamic_emc(&tegra_fb->win, 1);
 		tegra_dc_update_windows(&tegra_fb->win, 1);
 		tegra_dc_sync_windows(&tegra_fb->win, 1);
 	}
@@ -515,6 +517,7 @@ static void tegra_fb_flip_worker(struct work_struct *work)
 #endif
 	}
 
+	tegra_dc_set_dynamic_emc(wins, nr_win);
 	tegra_dc_update_windows(wins, nr_win);
 	/* TODO: implement swapinterval here */
 	tegra_dc_sync_windows(wins, nr_win);
@@ -572,6 +575,12 @@ static int tegra_fb_flip(struct tegra_fb_info *tegra_fb,
 
 	queue_work(tegra_fb->flip_wq, &data->work);
 
+	/*
+	 * Before the queued flip_wq get scheduled, we set the EMC clock to the
+	 * default value in order to do FLIP without glitch.
+	 */
+	tegra_dc_set_default_emc(tegra_fb->win->dc);
+
 	args->post_syncpt_val = syncpt_max;
 	args->post_syncpt_id = tegra_dc_get_syncpt_id(tegra_fb->win->dc);
 
@@ -865,6 +874,8 @@ struct tegra_fb_info *tegra_fb_register(struct nvhost_device *ndev,
 	dev_info(&ndev->dev, "probed\n");
 
 	if (fb_data->flags & TEGRA_FB_FLIP_ON_PROBE) {
+		tegra_dc_set_default_emc(tegra_fb->win->dc);
+		tegra_dc_set_dynamic_emc(&tegra_fb->win, 1);
 		tegra_dc_update_windows(&tegra_fb->win, 1);
 		tegra_dc_sync_windows(&tegra_fb->win, 1);
 	}
author	Xin Xie <xxie@nvidia.com>	2011-06-23 14:22:11 -0700
committer	Niket Sirsi <nsirsi@nvidia.com>	2011-07-21 18:01:58 -0700
commit	84f4fd2fa904ce20aad7732288736c32e9216851 (patch)
tree	69172c667d533b871d3312a50ffc040bc953df14
parent	bd8d778195bdcdf53fb1e6b9783ab08bf3b42e40 (diff)