From 333dd36a0c2f725abc41f2420420ec89538b9a04 Mon Sep 17 00:00:00 2001 From: Jon Mayo Date: Fri, 1 Jun 2012 11:56:04 -0700 Subject: video: tegra: dc: new file for bandwidth calc Move bandwidth calculation logic into its own file. Change-Id: I57f58a6399805eede8783fea922c6f07dcbd54cb Signed-off-by: Jon Mayo Reviewed-on: http://git-master/r/106291 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit --- drivers/video/tegra/dc/bandwidth.c | 275 +++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 drivers/video/tegra/dc/bandwidth.c (limited to 'drivers/video/tegra/dc/bandwidth.c') diff --git a/drivers/video/tegra/dc/bandwidth.c b/drivers/video/tegra/dc/bandwidth.c new file mode 100644 index 000000000000..a1da7ef0a995 --- /dev/null +++ b/drivers/video/tegra/dc/bandwidth.c @@ -0,0 +1,275 @@ +/* + * drivers/video/tegra/dc/bandwidth.c + * + * Copyright (C) 2010-2012 NVIDIA Corporation + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "dc_reg.h" +#include "dc_config.h" +#include "dc_priv.h" + +static int use_dynamic_emc = 1; + +module_param_named(use_dynamic_emc, use_dynamic_emc, int, S_IRUGO | S_IWUSR); + +/* uses the larger of w->bandwidth or w->new_bandwidth, and copies + * w->new_bandwidth into w->bandwidth */ +static void tegra_dc_set_latency_allowance(struct tegra_dc *dc, + struct tegra_dc_win *w) +{ + /* windows A, B, C for first and second display */ + static const enum tegra_la_id la_id_tab[2][3] = { + /* first display */ + { TEGRA_LA_DISPLAY_0A, TEGRA_LA_DISPLAY_0B, + TEGRA_LA_DISPLAY_0C }, + /* second display */ + { TEGRA_LA_DISPLAY_0AB, TEGRA_LA_DISPLAY_0BB, + TEGRA_LA_DISPLAY_0CB }, + }; + /* window B V-filter tap for first and second display. */ + static const enum tegra_la_id vfilter_tab[2] = { + TEGRA_LA_DISPLAY_1B, TEGRA_LA_DISPLAY_1BB, + }; + unsigned long bw; + + BUG_ON(dc->ndev->id >= ARRAY_SIZE(la_id_tab)); + BUG_ON(dc->ndev->id >= ARRAY_SIZE(vfilter_tab)); + BUG_ON(w->idx >= ARRAY_SIZE(*la_id_tab)); + + bw = max(w->bandwidth, w->new_bandwidth); + + /* tegra_dc_get_bandwidth() treats V filter windows as double + * bandwidth, but LA has a seperate client for V filter */ + if (w->idx == 1 && win_use_v_filter(dc, w)) + bw /= 2; + + /* our bandwidth is in kbytes/sec, but LA takes MBps. + * round up bandwidth to next 1MBps */ + bw = bw / 1000 + 1; + +#ifdef CONFIG_TEGRA_SILICON_PLATFORM + tegra_set_latency_allowance(la_id_tab[dc->ndev->id][w->idx], bw); + /* if window B, also set the 1B client for the 2-tap V filter. */ + if (w->idx == 1) + tegra_set_latency_allowance(vfilter_tab[dc->ndev->id], bw); +#endif + + w->bandwidth = w->new_bandwidth; +} + +static unsigned int tegra_dc_windows_is_overlapped(struct tegra_dc_win *a, + struct tegra_dc_win *b) +{ + if (!WIN_IS_ENABLED(a) || !WIN_IS_ENABLED(b)) + return 0; + + /* because memory access to load the fifo can overlap, only care + * if windows overlap vertically */ + return ((a->out_y + a->out_h > b->out_y) && (a->out_y <= b->out_y)) || + ((b->out_y + b->out_h > a->out_y) && (b->out_y <= a->out_y)); +} + +static unsigned long tegra_dc_find_max_bandwidth(struct tegra_dc_win *wins[], + int n) +{ + unsigned i; + unsigned j; + unsigned overlap_count; + unsigned max_bw = 0; + + WARN_ONCE(n > 3, "Code assumes at most 3 windows, bandwidth is likely" + "inaccurate.\n"); + + /* If we had a large number of windows, we would compute adjacency + * graph representing 2 window overlaps, find all cliques in the graph, + * assign bandwidth to each clique, and then select the clique with + * maximum bandwidth. But because we have at most 3 windows, + * implementing proper Bron-Kerbosh algorithm would be an overkill, + * brute force will suffice. + * + * Thus: find maximum bandwidth for either single or a pair of windows + * and count number of window pair overlaps. If there are three + * pairs, all 3 window overlap. + */ + + overlap_count = 0; + for (i = 0; i < n; i++) { + unsigned int bw1; + + if (wins[i] == NULL) + continue; + bw1 = wins[i]->new_bandwidth; + if (bw1 > max_bw) + /* Single window */ + max_bw = bw1; + + for (j = i + 1; j < n; j++) { + if (wins[j] == NULL) + continue; + if (tegra_dc_windows_is_overlapped(wins[i], wins[j])) { + unsigned int bw2 = wins[j]->new_bandwidth; + if (bw1 + bw2 > max_bw) + /* Window pair overlaps */ + max_bw = bw1 + bw2; + overlap_count++; + } + } + } + + if (overlap_count == 3) + /* All three windows overlap */ + max_bw = wins[0]->new_bandwidth + wins[1]->new_bandwidth + + wins[2]->new_bandwidth; + + return max_bw; +} + +/* + * Calculate peak EMC bandwidth for each enabled window = + * pixel_clock * win_bpp * (use_v_filter ? 2 : 1)) * H_scale_factor * + * (windows_tiling ? 2 : 1) + * + * note: + * (*) We use 2 tap V filter, so need double BW if use V filter + * (*) Tiling mode on T30 and DDR3 requires double BW + * + * return: + * bandwidth in kBps + */ +static unsigned long tegra_dc_calc_win_bandwidth(struct tegra_dc *dc, + struct tegra_dc_win *w) +{ + unsigned long ret; + int tiled_windows_bw_multiplier; + unsigned long bpp; + + if (!WIN_IS_ENABLED(w)) + return 0; + + if (dfixed_trunc(w->w) == 0 || dfixed_trunc(w->h) == 0 || + w->out_w == 0 || w->out_h == 0) + return 0; + + tiled_windows_bw_multiplier = + tegra_mc_get_tiled_memory_bandwidth_multiplier(); + + /* all of tegra's YUV formats(420 and 422) fetch 2 bytes per pixel, + * but the size reported by tegra_dc_fmt_bpp for the planar version + * is of the luma plane's size only. */ + bpp = tegra_dc_is_yuv_planar(w->fmt) ? + 2 * tegra_dc_fmt_bpp(w->fmt) : tegra_dc_fmt_bpp(w->fmt); + ret = dc->mode.pclk / 1000UL * bpp / 8 * ( + win_use_v_filter(dc, w) ? 2 : 1) * + dfixed_trunc(w->w) / w->out_w * (WIN_IS_TILED(w) ? + tiled_windows_bw_multiplier : 1); + + return ret; +} + +static unsigned long tegra_dc_get_bandwidth( + struct tegra_dc_win *windows[], int n) +{ + int i; + + BUG_ON(n > DC_N_WINDOWS); + + /* emc rate and latency allowance both need to know per window + * bandwidths */ + for (i = 0; i < n; i++) { + struct tegra_dc_win *w = windows[i]; + + if (w) + w->new_bandwidth = + tegra_dc_calc_win_bandwidth(w->dc, w); + } + + return tegra_dc_find_max_bandwidth(windows, n); +} + +/* to save power, call when display memory clients would be idle */ +void tegra_dc_clear_bandwidth(struct tegra_dc *dc) +{ + trace_printk("%s:%s rate=%d\n", dc->ndev->name, __func__, + dc->emc_clk_rate); + if (tegra_is_clk_enabled(dc->emc_clk)) + clk_disable(dc->emc_clk); + dc->emc_clk_rate = 0; +} + +/* use the larger of dc->emc_clk_rate or dc->new_emc_clk_rate, and copies + * dc->new_emc_clk_rate into dc->emc_clk_rate. + * calling this function both before and after a flip is sufficient to select + * the best possible frequency and latency allowance. + */ +void tegra_dc_program_bandwidth(struct tegra_dc *dc) +{ + unsigned i; + + if (dc->emc_clk_rate != dc->new_emc_clk_rate) { + /* going from 0 to non-zero */ + if (!dc->emc_clk_rate && !tegra_is_clk_enabled(dc->emc_clk)) + clk_enable(dc->emc_clk); + + clk_set_rate(dc->emc_clk, + max(dc->emc_clk_rate, dc->new_emc_clk_rate)); + dc->emc_clk_rate = dc->new_emc_clk_rate; + + if (!dc->new_emc_clk_rate) /* going from non-zero to 0 */ + clk_disable(dc->emc_clk); + } + + for (i = 0; i < DC_N_WINDOWS; i++) { + struct tegra_dc_win *w = &dc->windows[i]; + + if (w->bandwidth != w->new_bandwidth && w->new_bandwidth != 0) + tegra_dc_set_latency_allowance(dc, w); + trace_printk("%s:win%u bandwidth=%d\n", dc->ndev->name, w->idx, + w->bandwidth); + } +} + +int tegra_dc_set_dynamic_emc(struct tegra_dc_win *windows[], int n) +{ + unsigned long new_rate; + struct tegra_dc *dc; + + if (!use_dynamic_emc) + return 0; + + dc = windows[0]->dc; + + /* calculate the new rate based on this POST */ + new_rate = tegra_dc_get_bandwidth(windows, n); + if (WARN_ONCE(new_rate > (ULONG_MAX / 1000), "bandwidth maxed out\n")) + new_rate = ULONG_MAX; + else + new_rate = EMC_BW_TO_FREQ(new_rate * 1000); + + if (tegra_dc_has_multiple_dc()) + new_rate = ULONG_MAX; + + trace_printk("%s:new_emc_clk_rate=%ld\n", dc->ndev->name, new_rate); + dc->new_emc_clk_rate = new_rate; + + return 0; +} -- cgit v1.2.3