summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Frydrych <mfrydrych@nvidia.com>2011-08-02 16:05:33 +0300
committerDan Willemsen <dwillemsen@nvidia.com>2011-11-30 21:48:05 -0800
commitfa1c8800792e9c2a9f3cc261247818016056775b (patch)
tree759230f41e82a36cccbd7dbcdfeff773f5c7a093
parent3e8e2a67e2d2a0d77218d487082fdbffccbb548d (diff)
video: tegra: dc: window bandwidth calculations
Determine which windows are overlapping, and apply bandwidth calculations for emc clock scaling and latency allowance appropriately. Bug 856234 Bug 850602 Original-Change-Id: If587c46e8929b3885b25125f054f5cc2d22b2b58 Reviewed-on: http://git-master/r/44772 Reviewed-by: Varun Colbert <vcolbert@nvidia.com> Tested-by: Varun Colbert <vcolbert@nvidia.com> Rebase-Id: R841ce1734a7afab1311d3367a72ba9755d6d539c
-rw-r--r--drivers/video/tegra/dc/dc.c117
-rw-r--r--drivers/video/tegra/dc/dc_priv.h1
-rw-r--r--drivers/video/tegra/dc/overlay.c18
3 files changed, 89 insertions, 47 deletions
diff --git a/drivers/video/tegra/dc/dc.c b/drivers/video/tegra/dc/dc.c
index 5a7d1c9cd217..ad18d891b197 100644
--- a/drivers/video/tegra/dc/dc.c
+++ b/drivers/video/tegra/dc/dc.c
@@ -483,7 +483,7 @@ out:
return ret;
}
-static unsigned int tegra_dc_has_multiple_dc(void)
+unsigned int tegra_dc_has_multiple_dc(void)
{
unsigned int idx;
unsigned int cnt = 0;
@@ -643,15 +643,15 @@ static void tegra_dc_set_latency_allowance(struct tegra_dc *dc,
BUG_ON(dc->ndev->id >= ARRAY_SIZE(vfilter_tab));
BUG_ON(w->idx >= ARRAY_SIZE(*la_id_tab));
+ /* our bandwidth is in bytes/sec, but LA takes MBps.
+ * round up bandwidth to 1MBps */
+ bw = w->new_bandwidth / 1000000 + 1;
+
/* tegra_dc_get_bandwidth() treats V filter windows as double
* bandwidth, but LA has a seperate client for V filter */
if (w->idx == 1 && WIN_USE_V_FILTER(w))
bw /= 2;
- /* our bandwidth is in bytes/sec, but LA takes MBps.
- * round up bandwidth to 1MBps */
- bw = w->new_bandwidth / 1000000 + 1;
-
tegra_set_latency_allowance(la_id_tab[dc->ndev->id][w->idx], bw);
/* if window B, also set the 1B client for the 2-tap V filter. */
@@ -666,31 +666,66 @@ static unsigned int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a,
{
if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b))
return 0;
+
+ /* because memory access to load the fifo can overlap, only care
+ * if windows overlap vertically */
return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) ||
- ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
+ ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y));
}
-static unsigned int tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
- int n)
+static unsigned long tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[],
+ int n)
{
- /* We have n windows and knows their geometries and bandwidthes. If any
- * of them overlapped vertically, the overlapped area bandwidth get
- * combined.
+ unsigned i;
+ unsigned j;
+ unsigned overlap_count;
+ unsigned max_bw = 0;
+
+ WARN_ONCE(n > 3, "Code assumes at most 3 windows, bandwidth is likely"
+ "inaccurate.\n");
+
+ /* If we had a large number of windows, we would compute adjacency
+ * graph representing 2 window overlaps, find all cliques in the graph,
+ * assign bandwidth to each clique, and then select the clique with
+ * maximum bandwidth. But because we have at most 3 windows,
+ * implementing proper Bron-Kerbosh algorithm would be an overkill,
+ * brute force will suffice.
*
- * This function will find the maximum bandwidth of overlapped area.
- * If there is no windows overlapped, then return the maximum
- * bandwidth of windows.
+ * Thus: find maximum bandwidth for either single or a pair of windows
+ * and count number of window pair overlaps. If there are three
+ * pairs, all 3 window overlap.
*/
- WARN_ONCE(n != 3, "Code assumes 3 windows, possibly reading junk.\n");
- /* We know win_2 is always overlapped with win_0 and win_1. */
- if (tegra_dc_windows_is_overlapped(wins[0], wins[1]))
- return wins[0]->new_bandwidth + wins[1]->new_bandwidth +
- wins[2]->new_bandwidth;
- else
- return max(wins[0]->new_bandwidth, wins[1]->new_bandwidth) +
- wins[2]->new_bandwidth;
+ overlap_count = 0;
+ for (i = 0; i < n; i++) {
+ unsigned int bw1;
+
+ if (wins[i] == NULL)
+ continue;
+ bw1 = wins[i]->new_bandwidth;
+ if (bw1 > max_bw)
+ /* Single window */
+ max_bw = bw1;
+
+ for (j = i + 1; j < n; j++) {
+ if (wins[j] == NULL)
+ continue;
+ if (tegra_dc_windows_is_overlapped(wins[i], wins[j])) {
+ unsigned int bw2 = wins[j]->new_bandwidth;
+ if (bw1 + bw2 > max_bw)
+ /* Window pair overlaps */
+ max_bw = bw1 + bw2;
+ overlap_count++;
+ }
+ }
+ }
+ if (overlap_count == 3)
+ /* All three windows overlap */
+ max_bw = wins[0]->new_bandwidth + wins[1]->new_bandwidth +
+ wins[2]->new_bandwidth;
+
+ return max_bw;
}
/*
@@ -708,17 +743,26 @@ static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc,
{
unsigned long ret;
int tiled_windows_bw_multiplier;
+ unsigned long bpp;
if (!WIN_IS_ENABLED(w))
return 0;
+ if (w->w == 0 || w->h == 0 || w->out_w == 0 || w->out_h == 0)
+ return 0;
+
tiled_windows_bw_multiplier =
tegra_mc_get_tiled_memory_bandwidth_multiplier();
+ /* all of tegra's YUV formats(420 and 422) fetch 2 bytes per pixel,
+ * but the size reported by tegra_dc_fmt_bpp for the planar version
+ * is of the luma plane's size only. */
+ bpp = tegra_dc_is_yuv_planar(w->fmt) ?
+ 2 * tegra_dc_fmt_bpp(w->fmt) : tegra_dc_fmt_bpp(w->fmt);
/* perform calculations with most significant bits of pixel clock
* to prevent overflow of long. */
ret = (unsigned long)(dc->pixel_clk >> 16) *
- (tegra_dc_fmt_bpp(w->fmt) / 8) *
+ bpp / 8 *
(WIN_USE_V_FILTER(w) ? 2 : 1) * w->w / w->out_w *
(WIN_IS_TILED(w) ? tiled_windows_bw_multiplier : 1);
@@ -740,19 +784,15 @@ static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc,
unsigned long tegra_dc_get_bandwidth(struct tegra_dc_win *windows[], int n)
{
int i;
- struct tegra_dc *dc;
- if (windows[0] == NULL)
- return tegra_dc_get_default_emc_clk_rate(dc);
-
- dc = windows[0]->dc;
BUG_ON(n > DC_N_WINDOWS);
+
/* emc rate and latency allowance both need to know per window
* bandwidths */
for (i = 0; i < n; i++) {
struct tegra_dc_win *w = windows[i];
if (w)
- w->new_bandwidth = tegra_dc_calc_win_bandwidth(dc, w);
+ w->new_bandwidth = tegra_dc_calc_win_bandwidth(w->dc, w);
}
return tegra_dc_find_max_bandwidth(windows, n);
@@ -788,13 +828,8 @@ static int tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n)
new_rate = tegra_dc_get_bandwidth(windows, n);
new_rate = EMC_BW_TO_FREQ(new_rate);
- WARN_ONCE(new_rate > tegra_dc_get_default_emc_clk_rate(dc),
- "Calculated EMC bandwidth is %luHz, "
- "maximum allowed EMC bandwidth is %luHz\n",
- new_rate, tegra_dc_get_default_emc_clk_rate(dc));
-
if (tegra_dc_has_multiple_dc())
- new_rate = tegra_dc_get_default_emc_clk_rate(dc);
+ new_rate = ULONG_MAX;
dc->new_emc_clk_rate = new_rate;
@@ -1274,6 +1309,10 @@ static int tegra_dc_program_mode(struct tegra_dc *dc, struct tegra_dc_mode *mode
print_mode(dc, mode, __func__);
+ /* use default EMC rate when switching modes */
+ dc->new_emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc);
+ tegra_dc_program_bandwidth(dc);
+
tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
tegra_dc_writel(dc, mode->h_ref_to_sync | (mode->v_ref_to_sync << 16),
DC_DISP_REF_TO_SYNC);
@@ -2002,6 +2041,8 @@ void tegra_dc_enable(struct tegra_dc *dc)
static void _tegra_dc_controller_disable(struct tegra_dc *dc)
{
+ unsigned i;
+
disable_irq(dc->irq);
if (dc->out_ops && dc->out_ops->disable)
@@ -2011,6 +2052,12 @@ static void _tegra_dc_controller_disable(struct tegra_dc *dc)
clk_disable(dc->clk);
tegra_dvfs_set_rate(dc->clk, 0);
+ for (i = 0; i < DC_N_WINDOWS; i++) {
+ struct tegra_dc_win *w = &dc->windows[i];
+ w->bandwidth = 0;
+ w->new_bandwidth = 0;
+ }
+
if (dc->out && dc->out->disable)
dc->out->disable();
diff --git a/drivers/video/tegra/dc/dc_priv.h b/drivers/video/tegra/dc/dc_priv.h
index a1b434c15061..ebd339c489c2 100644
--- a/drivers/video/tegra/dc/dc_priv.h
+++ b/drivers/video/tegra/dc/dc_priv.h
@@ -193,6 +193,7 @@ void tegra_dc_stats_enable(struct tegra_dc *dc, bool enable);
bool tegra_dc_stats_get(struct tegra_dc *dc);
/* defined in dc.c, used by overlay.c */
+unsigned int tegra_dc_has_multiple_dc(void);
unsigned long tegra_dc_get_bandwidth(struct tegra_dc_win *wins[], int n);
/* defined in dc.c, used by dc_sysfs.c */
diff --git a/drivers/video/tegra/dc/overlay.c b/drivers/video/tegra/dc/overlay.c
index 1db1cb469b92..7879100be8e1 100644
--- a/drivers/video/tegra/dc/overlay.c
+++ b/drivers/video/tegra/dc/overlay.c
@@ -405,7 +405,7 @@ surf_err:
static void tegra_overlay_set_emc_freq(struct tegra_overlay_info *dev)
{
- unsigned long emc_freq = 0;
+ unsigned long new_rate;
int i;
struct tegra_dc_win *win;
struct tegra_dc_win *wins[DC_N_WINDOWS];
@@ -415,19 +415,13 @@ static void tegra_overlay_set_emc_freq(struct tegra_overlay_info *dev)
wins[i] = win;
}
- emc_freq = tegra_dc_get_bandwidth(wins, dev->dc->n_windows);
+ new_rate = tegra_dc_get_bandwidth(wins, dev->dc->n_windows);
+ new_rate = EMC_BW_TO_FREQ(new_rate);
- if (emc_freq > tegra_dc_get_default_emc_clk_rate(dev->dc)) {
- WARN_ONCE(emc_freq > tegra_dc_get_default_emc_clk_rate(dev->dc),
- "Overlay: calculated EMC bandwidth is %luHz greater "
- "than maximum allowed %luHz. Setting to max.\n",
- emc_freq,
- tegra_dc_get_default_emc_clk_rate(dev->dc));
+ if (tegra_dc_has_multiple_dc())
+ new_rate = ULONG_MAX;
- emc_freq = tegra_dc_get_default_emc_clk_rate(dev->dc);
- }
-
- clk_set_rate(dev->dc->emc_clk, emc_freq);
+ clk_set_rate(dev->dc->emc_clk, new_rate);
}
/* Overlay functions */