From fa1c8800792e9c2a9f3cc261247818016056775b Mon Sep 17 00:00:00 2001 From: Michael Frydrych Date: Tue, 2 Aug 2011 16:05:33 +0300 Subject: video: tegra: dc: window bandwidth calculations Determine which windows are overlapping, and apply bandwidth calculations for emc clock scaling and latency allowance appropriately. Bug 856234 Bug 850602 Original-Change-Id: If587c46e8929b3885b25125f054f5cc2d22b2b58 Reviewed-on: http://git-master/r/44772 Reviewed-by: Varun Colbert Tested-by: Varun Colbert Rebase-Id: R841ce1734a7afab1311d3367a72ba9755d6d539c --- drivers/video/tegra/dc/dc.c | 117 +++++++++++++++++++++++++++------------ drivers/video/tegra/dc/dc_priv.h | 1 + drivers/video/tegra/dc/overlay.c | 18 ++---- 3 files changed, 89 insertions(+), 47 deletions(-) (limited to 'drivers') diff --git a/drivers/video/tegra/dc/dc.c b/drivers/video/tegra/dc/dc.c index 5a7d1c9cd217..ad18d891b197 100644 --- a/drivers/video/tegra/dc/dc.c +++ b/drivers/video/tegra/dc/dc.c @@ -483,7 +483,7 @@ out: return ret; } -static unsigned int tegra_dc_has_multiple_dc(void) +unsigned int tegra_dc_has_multiple_dc(void) { unsigned int idx; unsigned int cnt = 0; @@ -643,15 +643,15 @@ static void tegra_dc_set_latency_allowance(struct tegra_dc *dc, BUG_ON(dc->ndev->id >= ARRAY_SIZE(vfilter_tab)); BUG_ON(w->idx >= ARRAY_SIZE(*la_id_tab)); + /* our bandwidth is in bytes/sec, but LA takes MBps. + * round up bandwidth to 1MBps */ + bw = w->new_bandwidth / 1000000 + 1; + /* tegra_dc_get_bandwidth() treats V filter windows as double * bandwidth, but LA has a seperate client for V filter */ if (w->idx == 1 && WIN_USE_V_FILTER(w)) bw /= 2; - /* our bandwidth is in bytes/sec, but LA takes MBps. - * round up bandwidth to 1MBps */ - bw = w->new_bandwidth / 1000000 + 1; - tegra_set_latency_allowance(la_id_tab[dc->ndev->id][w->idx], bw); /* if window B, also set the 1B client for the 2-tap V filter. */ @@ -666,31 +666,66 @@ static unsigned int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a, { if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b)) return 0; + + /* because memory access to load the fifo can overlap, only care + * if windows overlap vertically */ return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) || - ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y)); + ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y)); } -static unsigned int tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[], - int n) +static unsigned long tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[], + int n) { - /* We have n windows and knows their geometries and bandwidthes. If any - * of them overlapped vertically, the overlapped area bandwidth get - * combined. + unsigned i; + unsigned j; + unsigned overlap_count; + unsigned max_bw = 0; + + WARN_ONCE(n > 3, "Code assumes at most 3 windows, bandwidth is likely" + "inaccurate.\n"); + + /* If we had a large number of windows, we would compute adjacency + * graph representing 2 window overlaps, find all cliques in the graph, + * assign bandwidth to each clique, and then select the clique with + * maximum bandwidth. But because we have at most 3 windows, + * implementing proper Bron-Kerbosh algorithm would be an overkill, + * brute force will suffice. * - * This function will find the maximum bandwidth of overlapped area. - * If there is no windows overlapped, then return the maximum - * bandwidth of windows. + * Thus: find maximum bandwidth for either single or a pair of windows + * and count number of window pair overlaps. If there are three + * pairs, all 3 window overlap. */ - WARN_ONCE(n != 3, "Code assumes 3 windows, possibly reading junk.\n"); - /* We know win_2 is always overlapped with win_0 and win_1. */ - if (tegra_dc_windows_is_overlapped(wins[0], wins[1])) - return wins[0]->new_bandwidth + wins[1]->new_bandwidth + - wins[2]->new_bandwidth; - else - return max(wins[0]->new_bandwidth, wins[1]->new_bandwidth) + - wins[2]->new_bandwidth; + overlap_count = 0; + for (i = 0; i < n; i++) { + unsigned int bw1; + + if (wins[i] == NULL) + continue; + bw1 = wins[i]->new_bandwidth; + if (bw1 > max_bw) + /* Single window */ + max_bw = bw1; + + for (j = i + 1; j < n; j++) { + if (wins[j] == NULL) + continue; + if (tegra_dc_windows_is_overlapped(wins[i], wins[j])) { + unsigned int bw2 = wins[j]->new_bandwidth; + if (bw1 + bw2 > max_bw) + /* Window pair overlaps */ + max_bw = bw1 + bw2; + overlap_count++; + } + } + } + if (overlap_count == 3) + /* All three windows overlap */ + max_bw = wins[0]->new_bandwidth + wins[1]->new_bandwidth + + wins[2]->new_bandwidth; + + return max_bw; } /* @@ -708,17 +743,26 @@ static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc, { unsigned long ret; int tiled_windows_bw_multiplier; + unsigned long bpp; if (!WIN_IS_ENABLED(w)) return 0; + if (w->w == 0 || w->h == 0 || w->out_w == 0 || w->out_h == 0) + return 0; + tiled_windows_bw_multiplier = tegra_mc_get_tiled_memory_bandwidth_multiplier(); + /* all of tegra's YUV formats(420 and 422) fetch 2 bytes per pixel, + * but the size reported by tegra_dc_fmt_bpp for the planar version + * is of the luma plane's size only. */ + bpp = tegra_dc_is_yuv_planar(w->fmt) ? + 2 * tegra_dc_fmt_bpp(w->fmt) : tegra_dc_fmt_bpp(w->fmt); /* perform calculations with most significant bits of pixel clock * to prevent overflow of long. */ ret = (unsigned long)(dc->pixel_clk >> 16) * - (tegra_dc_fmt_bpp(w->fmt) / 8) * + bpp / 8 * (WIN_USE_V_FILTER(w) ? 2 : 1) * w->w / w->out_w * (WIN_IS_TILED(w) ? tiled_windows_bw_multiplier : 1); @@ -740,19 +784,15 @@ static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc, unsigned long tegra_dc_get_bandwidth(struct tegra_dc_win *windows[], int n) { int i; - struct tegra_dc *dc; - if (windows[0] == NULL) - return tegra_dc_get_default_emc_clk_rate(dc); - - dc = windows[0]->dc; BUG_ON(n > DC_N_WINDOWS); + /* emc rate and latency allowance both need to know per window * bandwidths */ for (i = 0; i < n; i++) { struct tegra_dc_win *w = windows[i]; if (w) - w->new_bandwidth = tegra_dc_calc_win_bandwidth(dc, w); + w->new_bandwidth = tegra_dc_calc_win_bandwidth(w->dc, w); } return tegra_dc_find_max_bandwidth(windows, n); @@ -788,13 +828,8 @@ static int tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n) new_rate = tegra_dc_get_bandwidth(windows, n); new_rate = EMC_BW_TO_FREQ(new_rate); - WARN_ONCE(new_rate > tegra_dc_get_default_emc_clk_rate(dc), - "Calculated EMC bandwidth is %luHz, " - "maximum allowed EMC bandwidth is %luHz\n", - new_rate, tegra_dc_get_default_emc_clk_rate(dc)); - if (tegra_dc_has_multiple_dc()) - new_rate = tegra_dc_get_default_emc_clk_rate(dc); + new_rate = ULONG_MAX; dc->new_emc_clk_rate = new_rate; @@ -1274,6 +1309,10 @@ static int tegra_dc_program_mode(struct tegra_dc *dc, struct tegra_dc_mode *mode print_mode(dc, mode, __func__); + /* use default EMC rate when switching modes */ + dc->new_emc_clk_rate = tegra_dc_get_default_emc_clk_rate(dc); + tegra_dc_program_bandwidth(dc); + tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS); tegra_dc_writel(dc, mode->h_ref_to_sync | (mode->v_ref_to_sync << 16), DC_DISP_REF_TO_SYNC); @@ -2002,6 +2041,8 @@ void tegra_dc_enable(struct tegra_dc *dc) static void _tegra_dc_controller_disable(struct tegra_dc *dc) { + unsigned i; + disable_irq(dc->irq); if (dc->out_ops && dc->out_ops->disable) @@ -2011,6 +2052,12 @@ static void _tegra_dc_controller_disable(struct tegra_dc *dc) clk_disable(dc->clk); tegra_dvfs_set_rate(dc->clk, 0); + for (i = 0; i < DC_N_WINDOWS; i++) { + struct tegra_dc_win *w = &dc->windows[i]; + w->bandwidth = 0; + w->new_bandwidth = 0; + } + if (dc->out && dc->out->disable) dc->out->disable(); diff --git a/drivers/video/tegra/dc/dc_priv.h b/drivers/video/tegra/dc/dc_priv.h index a1b434c15061..ebd339c489c2 100644 --- a/drivers/video/tegra/dc/dc_priv.h +++ b/drivers/video/tegra/dc/dc_priv.h @@ -193,6 +193,7 @@ void tegra_dc_stats_enable(struct tegra_dc *dc, bool enable); bool tegra_dc_stats_get(struct tegra_dc *dc); /* defined in dc.c, used by overlay.c */ +unsigned int tegra_dc_has_multiple_dc(void); unsigned long tegra_dc_get_bandwidth(struct tegra_dc_win *wins[], int n); /* defined in dc.c, used by dc_sysfs.c */ diff --git a/drivers/video/tegra/dc/overlay.c b/drivers/video/tegra/dc/overlay.c index 1db1cb469b92..7879100be8e1 100644 --- a/drivers/video/tegra/dc/overlay.c +++ b/drivers/video/tegra/dc/overlay.c @@ -405,7 +405,7 @@ surf_err: static void tegra_overlay_set_emc_freq(struct tegra_overlay_info *dev) { - unsigned long emc_freq = 0; + unsigned long new_rate; int i; struct tegra_dc_win *win; struct tegra_dc_win *wins[DC_N_WINDOWS]; @@ -415,19 +415,13 @@ static void tegra_overlay_set_emc_freq(struct tegra_overlay_info *dev) wins[i] = win; } - emc_freq = tegra_dc_get_bandwidth(wins, dev->dc->n_windows); + new_rate = tegra_dc_get_bandwidth(wins, dev->dc->n_windows); + new_rate = EMC_BW_TO_FREQ(new_rate); - if (emc_freq > tegra_dc_get_default_emc_clk_rate(dev->dc)) { - WARN_ONCE(emc_freq > tegra_dc_get_default_emc_clk_rate(dev->dc), - "Overlay: calculated EMC bandwidth is %luHz greater " - "than maximum allowed %luHz. Setting to max.\n", - emc_freq, - tegra_dc_get_default_emc_clk_rate(dev->dc)); + if (tegra_dc_has_multiple_dc()) + new_rate = ULONG_MAX; - emc_freq = tegra_dc_get_default_emc_clk_rate(dev->dc); - } - - clk_set_rate(dev->dc->emc_clk, emc_freq); + clk_set_rate(dev->dc->emc_clk, new_rate); } /* Overlay functions */ -- cgit v1.2.3