diff options
Diffstat (limited to 'drivers/video/tegra/host')
-rw-r--r-- | drivers/video/tegra/host/bus.c | 2 | ||||
-rw-r--r-- | drivers/video/tegra/host/bus_client.c | 3 | ||||
-rw-r--r-- | drivers/video/tegra/host/chip_support.h | 1 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d.c | 4 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d.h | 3 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d_t20.c | 14 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/gr3d_t30.c | 28 | ||||
-rw-r--r-- | drivers/video/tegra/host/gr3d/scale3d.c | 602 | ||||
-rw-r--r-- | drivers/video/tegra/host/host1x/host1x.c | 15 | ||||
-rw-r--r-- | drivers/video/tegra/host/host1x/host1x_cdma.c | 11 | ||||
-rw-r--r-- | drivers/video/tegra/host/host1x/host1x_channel.c | 8 | ||||
-rw-r--r-- | drivers/video/tegra/host/host1x/host1x_intr.c | 13 | ||||
-rw-r--r-- | drivers/video/tegra/host/mpe/mpe.c | 18 | ||||
-rw-r--r-- | drivers/video/tegra/host/nvhost_acm.c | 23 | ||||
-rw-r--r-- | drivers/video/tegra/host/nvhost_intr.c | 12 | ||||
-rw-r--r-- | drivers/video/tegra/host/nvhost_intr.h | 2 | ||||
-rw-r--r-- | drivers/video/tegra/host/nvhost_job.c | 31 | ||||
-rw-r--r-- | drivers/video/tegra/host/nvhost_syncpt.c | 6 | ||||
-rw-r--r-- | drivers/video/tegra/host/t30/t30.c | 2 |
19 files changed, 583 insertions, 215 deletions
diff --git a/drivers/video/tegra/host/bus.c b/drivers/video/tegra/host/bus.c index 758a5ca4ad94..f22dac288051 100644 --- a/drivers/video/tegra/host/bus.c +++ b/drivers/video/tegra/host/bus.c @@ -96,7 +96,7 @@ static int nvhost_bus_match(struct device *_dev, struct device_driver *drv) if (ndrv->id_table) return nvhost_bus_match_id(dev, ndrv->id_table) != NULL; else /* driver does not support id_table */ - return !strncmp(dev->name, drv->name, strlen(drv->name)); + return !strcmp(dev->name, drv->name); } static int nvhost_drv_probe(struct device *_dev) diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c index 0137793b39ee..aaa038221971 100644 --- a/drivers/video/tegra/host/bus_client.c +++ b/drivers/video/tegra/host/bus_client.c @@ -159,7 +159,8 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp) } filp->private_data = priv; priv->ch = ch; - nvhost_module_add_client(ch->dev, priv); + if(nvhost_module_add_client(ch->dev, priv)) + goto fail; if (ch->ctxhandler && ch->ctxhandler->alloc) { priv->hwctx = ch->ctxhandler->alloc(ch->ctxhandler, ch); diff --git a/drivers/video/tegra/host/chip_support.h b/drivers/video/tegra/host/chip_support.h index f5d2811f143f..412ce8b65466 100644 --- a/drivers/video/tegra/host/chip_support.h +++ b/drivers/video/tegra/host/chip_support.h @@ -125,6 +125,7 @@ struct nvhost_intr_ops { void (*set_syncpt_threshold)( struct nvhost_intr *, u32 id, u32 thresh); void (*enable_syncpt_intr)(struct nvhost_intr *, u32 id); + void (*disable_syncpt_intr)(struct nvhost_intr *, u32 id); void (*disable_all_syncpt_intrs)(struct nvhost_intr *); int (*request_host_general_irq)(struct nvhost_intr *); void (*free_host_general_irq)(struct nvhost_intr *); diff --git a/drivers/video/tegra/host/gr3d/gr3d.c b/drivers/video/tegra/host/gr3d/gr3d.c index 715468131d9e..775c77b0e88d 100644 --- a/drivers/video/tegra/host/gr3d/gr3d.c +++ b/drivers/video/tegra/host/gr3d/gr3d.c @@ -80,8 +80,10 @@ struct host1x_hwctx *nvhost_3dctx_alloc_common(struct host1x_hwctx_handler *p, ctx->restore = mem_op().alloc(memmgr, p->restore_size * 4, 32, map_restore ? mem_mgr_flag_write_combine : mem_mgr_flag_uncacheable); - if (IS_ERR_OR_NULL(ctx->restore)) + if (IS_ERR_OR_NULL(ctx->restore)) { + ctx->restore = NULL; goto fail; + } if (map_restore) { ctx->restore_virt = mem_op().mmap(ctx->restore); diff --git a/drivers/video/tegra/host/gr3d/gr3d.h b/drivers/video/tegra/host/gr3d/gr3d.h index 3855b237b702..61f708cea95c 100644 --- a/drivers/video/tegra/host/gr3d/gr3d.h +++ b/drivers/video/tegra/host/gr3d/gr3d.h @@ -29,6 +29,9 @@ #define AR3D_PSEQ_QUAD_ID 0x545 #define AR3D_DW_MEMORY_OUTPUT_ADDRESS 0x904 #define AR3D_DW_MEMORY_OUTPUT_DATA 0x905 +#define AR3D_FDC_CONTROL_0 0xa00 +#define AR3D_FDC_CONTROL_0_RESET_VAL 0xe00 +#define AR3D_FDC_CONTROL_0_INVALIDATE 1 #define AR3D_GSHIM_WRITE_MASK 0xb00 #define AR3D_GSHIM_READ_SELECT 0xb01 #define AR3D_GLOBAL_MEMORY_OUTPUT_READS 0xe40 diff --git a/drivers/video/tegra/host/gr3d/gr3d_t20.c b/drivers/video/tegra/host/gr3d/gr3d_t20.c index b6e3896fe50c..694b00527790 100644 --- a/drivers/video/tegra/host/gr3d/gr3d_t20.c +++ b/drivers/video/tegra/host/gr3d/gr3d_t20.c @@ -144,7 +144,7 @@ static void save_push_v0(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) p->save_phys); } -static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) { /* 3d: when done, increment syncpt to base+1 */ ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); @@ -162,7 +162,7 @@ static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr) h->syncpt); /* incr 2 */ } -static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count) +static void save_direct_v0(u32 *ptr, u32 start_reg, u32 count) { ptr[0] = nvhost_opcode_nonincr(host1x_uclass_indoff_r(), 1); ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D, @@ -170,7 +170,7 @@ static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count) ptr[2] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, +static void save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, u32 data_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, @@ -183,7 +183,7 @@ static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset, ptr[4] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr) { /* Wait for context read service to finish (cpu incr 3) */ ptr[0] = nvhost_opcode_nonincr(host1x_uclass_wait_syncpt_base_r(), 1); @@ -226,7 +226,7 @@ static u32 *save_regs_v0(u32 *ptr, unsigned int *pending, /*** save ***/ -static void __init setup_save_regs(struct save_info *info, +static void setup_save_regs(struct save_info *info, const struct hwctx_reginfo *regs, unsigned int nr_regs) { @@ -284,7 +284,7 @@ static void __init setup_save_regs(struct save_info *info, info->restore_count = restore_count; } -static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr) +static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr) { struct save_info info = { ptr, @@ -371,7 +371,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t20_ctxhandler_init( p->save_buf = mem_op().alloc(memmgr, p->save_size * sizeof(u32), 32, mem_mgr_flag_write_combine); - if (IS_ERR(p->save_buf)) { + if (IS_ERR_OR_NULL(p->save_buf)) { p->save_buf = NULL; return NULL; } diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c index c35fea2f3ac2..664708c7fc80 100644 --- a/drivers/video/tegra/host/gr3d/gr3d_t30.c +++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c @@ -125,6 +125,16 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0), NVHOST_OPCODE_NOOP); + /* invalidate the FDC to prevent cache-coherency issues across GPUs + note that we assume FDC_CONTROL_0 is left in the reset state by all + contexts. the invalidate bit will clear itself, so the register + should be unchanged after this */ + nvhost_cdma_push(cdma, + nvhost_opcode_imm(AR3D_FDC_CONTROL_0, + AR3D_FDC_CONTROL_0_RESET_VAL + | AR3D_FDC_CONTROL_0_INVALIDATE), + NVHOST_OPCODE_NOOP); + /* set register set 0 and 1 register read memory output addresses, and send their reads to memory */ @@ -132,7 +142,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 2), nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 1)); nvhost_cdma_push(cdma, - nvhost_opcode_nonincr(0x904, 1), + nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_ADDRESS, 1), ctx->restore_phys + restore_set1_offset * 4); nvhost_cdma_push(cdma, @@ -150,7 +160,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma) p->save_phys); } -static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) +static void save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) { ptr[0] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA, RESTORE_BEGIN_SIZE); @@ -158,7 +168,7 @@ static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr) ptr += RESTORE_BEGIN_SIZE; } -static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count) +static void save_direct_v1(u32 *ptr, u32 start_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, AR3D_DW_MEMORY_OUTPUT_DATA, 1); @@ -172,7 +182,7 @@ static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count) ptr[3] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, +static void save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, u32 data_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0); @@ -189,7 +199,7 @@ static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset, ptr[5] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count); } -static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) +static void save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) { /* write end of restore buffer */ ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, @@ -224,7 +234,7 @@ static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr) -static void __init setup_save_regs(struct save_info *info, +static void setup_save_regs(struct save_info *info, const struct hwctx_reginfo *regs, unsigned int nr_regs) { @@ -282,7 +292,7 @@ static void __init setup_save_regs(struct save_info *info, info->restore_count = restore_count; } -static void __init switch_gpu(struct save_info *info, +static void switch_gpu(struct save_info *info, unsigned int save_src_set, u32 save_dest_sets, u32 restore_dest_sets) @@ -303,7 +313,7 @@ static void __init switch_gpu(struct save_info *info, info->restore_count += 1; } -static void __init setup_save(struct host1x_hwctx_handler *p, u32 *ptr) +static void setup_save(struct host1x_hwctx_handler *p, u32 *ptr) { struct save_info info = { ptr, @@ -399,7 +409,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init( p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32, mem_mgr_flag_write_combine); - if (IS_ERR(p->save_buf)) { + if (IS_ERR_OR_NULL(p->save_buf)) { p->save_buf = NULL; return NULL; } diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c index 5922b55a836a..49147975a9e4 100644 --- a/drivers/video/tegra/host/gr3d/scale3d.c +++ b/drivers/video/tegra/host/gr3d/scale3d.c @@ -1,9 +1,9 @@ /* - * drivers/video/tegra/host/t20/scale3d.c + * drivers/video/tegra/host/gr3d/scale3d.c * * Tegra Graphics Host 3D clock scaling * - * Copyright (c) 2010-2012, NVIDIA Corporation. + * Copyright (c) 2010-2012, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -23,12 +23,11 @@ * * module3d_notify_busy() is called upon submit, module3d_notify_idle() is * called when all outstanding submits are completed. Idle times are measured - * over a fixed time period (scale3d.p_period). If the 3d module idle time - * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are - * scaled down. If the percentage goes under the minimum limit (set in - * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made - * over the time frame given in scale3d.p_fast_response for clocking up - * quickly in response to load peaks. + * over a fixed time period (scale3d.p_estimation_window). If the 3d module + * idle time percentage goes over the limit (set in scale3d.p_idle_max), 3d + * clocks are scaled down. If the percentage goes under the minimum limit (set + * in scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made + * for clocking up quickly in response to load peaks. * * 3d.emc clock is scaled proportionately to 3d clock, with a quadratic- * bezier-like factor added to pull 3d.emc rate a bit lower. @@ -37,10 +36,31 @@ #include <linux/debugfs.h> #include <linux/types.h> #include <linux/clk.h> +#include <linux/slab.h> #include <mach/clk.h> #include <mach/hardware.h> #include "scale3d.h" #include "dev.h" +#include <media/tegra_camera.h> + +#define GR3D_PRINT_STATS BIT(1) +#define GR3D_PRINT_BUSY BIT(2) +#define GR3D_PRINT_IDLE BIT(3) +#define GR3D_PRINT_HINT BIT(4) +#define GR3D_PRINT_TARGET BIT(5) + +/* time frame for load and hint tracking - when events come in at a larger + * interval, this probably indicates the current estimates are stale + */ +#define GR3D_TIMEFRAME 1000000 /* 1 sec */ + +/* the number of frames to use in the running average of load estimates and + * throughput hints. Choosing 6 frames targets a window of about 100 msec. + * Large flucutuations in frame times require a window that's large enough to + * prevent spiky scaling behavior, which in turn exacerbates frame rate + * instability. + */ +#define GR3D_FRAME_SPAN 6 static int scale3d_is_enabled(void); static void scale3d_enable(int enable); @@ -48,54 +68,73 @@ static void scale3d_enable(int enable); #define POW2(x) ((x) * (x)) /* + * 3D clock scaling should be treated differently when camera is on in AP37. + * 3D in AP37 requires 1.3V and combining it with MPE reaches to EDP limit. + * 3D clock really needs to be set to lower frequency which requires 1.0V. + * The same thing applies to 3D EMC clock. + */ +#define CAMERA_3D_CLK 300000000 +#define CAMERA_3D_EMC_CLK 437000000 + +/* * debugfs parameters to control 3d clock scaling test * - * period - time period for clock rate evaluation - * fast_response - time period for evaluation of 'busy' spikes - * idle_min - if less than [idle_min] percent idle over [fast_response] - * microseconds, clock up. - * idle_max - if over [idle_max] percent idle over [period] microseconds, - * clock down. + * estimation_window - time period for clock rate evaluation + * idle_min - if less than [idle_min / 10] percent idle over + * [estimation_window] microseconds, clock up. + * idle_max - if over [idle_max] percent idle over [estimation_window] + * microseconds, clock down. * max_scale - limits rate changes to no less than (100 - max_scale)% or * (100 + 2 * max_scale)% of current clock rate - * verbosity - set above 5 for debug printouts + * verbosity - bit flag to control debug printouts: + * 1 - stats + * 2 - busy + * 3 - idle + * 4 - hints + * 5 - target frequencies */ struct scale3d_info_rec { struct mutex lock; /* lock for timestamps etc */ int enable; int init; - ktime_t idle_frame; - ktime_t fast_frame; - ktime_t last_idle; - ktime_t last_short_term_idle; + ktime_t last_scale; int is_idle; - ktime_t last_tweak; - ktime_t last_down; + ktime_t last_adjust; int fast_up_count; int slow_down_count; int is_scaled; - int fast_responses; - unsigned long idle_total; - unsigned long idle_short_term_total; - unsigned long max_rate_3d; long emc_slope; long emc_offset; long emc_dip_slope; long emc_dip_offset; long emc_xmid; + unsigned long max_rate_3d; unsigned long min_rate_3d; + ktime_t last_throughput_hint; + struct work_struct work; struct delayed_work idle_timer; + + ktime_t last_estimation_window; + long last_total_idle; + long total_idle; + ktime_t estimation_window; + ktime_t last_notification; + long idle_estimate; + unsigned int scale; - unsigned int p_period; - unsigned int period; + unsigned int p_busy_cutoff; + unsigned int p_estimation_window; + unsigned int p_use_throughput_hint; + unsigned int p_throughput_lo_limit; + unsigned int p_throughput_lower_limit; + unsigned int p_throughput_hi_limit; + unsigned int p_scale_step; unsigned int p_idle_min; unsigned int idle_min; unsigned int p_idle_max; unsigned int idle_max; - unsigned int p_fast_response; - unsigned int fast_response; unsigned int p_adjust; unsigned int p_scale_emc; unsigned int p_emc_dip; @@ -103,13 +142,15 @@ struct scale3d_info_rec { struct clk *clk_3d; struct clk *clk_3d2; struct clk *clk_3d_emc; + int *freqlist; + int freq_count; }; static struct scale3d_info_rec scale3d; -static void scale3d_clocks(unsigned long percent) +static void scale_to_freq(unsigned long hz) { - unsigned long hz, curr; + unsigned long curr; if (!tegra_is_clk_enabled(scale3d.clk_3d)) return; @@ -119,7 +160,8 @@ static void scale3d_clocks(unsigned long percent) return; curr = clk_get_rate(scale3d.clk_3d); - hz = percent * (curr / 100); + if (hz == curr) + return; if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) { if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) @@ -139,6 +181,16 @@ static void scale3d_clocks(unsigned long percent) } } +static void scale3d_clocks(unsigned long percent) +{ + unsigned long hz, curr; + + curr = clk_get_rate(scale3d.clk_3d); + hz = percent * (curr / 100); + + scale_to_freq(hz); +} + static void scale3d_clocks_handler(struct work_struct *work) { unsigned int scale; @@ -164,12 +216,26 @@ void nvhost_scale3d_suspend(struct nvhost_device *dev) static void reset_3d_clocks(void) { if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) { - clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d); - if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) - clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d); - if (scale3d.p_scale_emc) - clk_set_rate(scale3d.clk_3d_emc, - clk_round_rate(scale3d.clk_3d_emc, UINT_MAX)); + if (is_tegra_camera_on()) + clk_set_rate(scale3d.clk_3d, CAMERA_3D_CLK); + else + clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d); + if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) { + if (is_tegra_camera_on()) + clk_set_rate(scale3d.clk_3d2, CAMERA_3D_CLK); + else + clk_set_rate(scale3d.clk_3d2, + scale3d.max_rate_3d); + } + if (scale3d.p_scale_emc) { + if (is_tegra_camera_on()) + clk_set_rate(scale3d.clk_3d_emc, + CAMERA_3D_EMC_CLK); + else + clk_set_rate(scale3d.clk_3d_emc, + clk_round_rate(scale3d.clk_3d_emc, + UINT_MAX)); + } } } @@ -207,15 +273,6 @@ static void scale3d_enable(int enable) reset_3d_clocks(); } -static void reset_scaling_counters(ktime_t time) -{ - scale3d.idle_total = 0; - scale3d.idle_short_term_total = 0; - scale3d.last_idle = time; - scale3d.last_short_term_idle = time; - scale3d.idle_frame = time; -} - /* scaling_adjust - use scale up / scale down hint counts to adjust scaling * parameters. * @@ -228,8 +285,6 @@ static void reset_scaling_counters(ktime_t time) * * the parameters adjusted are * - * * fast_response time - * * period - time for scaling down estimate * * idle_min percentage * * idle_max percentage */ @@ -242,13 +297,11 @@ static void reset_scaling_counters(ktime_t time) static void scaling_adjust(ktime_t time) { long hint_ratio; - long fast_response_adjustment; - long period_adjustment; int idle_min_adjustment; int idle_max_adjustment; unsigned long dt; - dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak); + dt = (unsigned long) ktime_us_delta(time, scale3d.last_adjust); if (dt < SCALING_ADJUST_PERIOD) return; @@ -256,13 +309,9 @@ static void scaling_adjust(ktime_t time) (scale3d.slow_down_count + 1); if (hint_ratio > HINT_RATIO_MAX) { - fast_response_adjustment = -((int) scale3d.p_fast_response) / 4; - period_adjustment = scale3d.p_period / 2; idle_min_adjustment = scale3d.p_idle_min; idle_max_adjustment = scale3d.p_idle_max; } else if (hint_ratio < HINT_RATIO_MIN) { - fast_response_adjustment = scale3d.p_fast_response / 2; - period_adjustment = -((int) scale3d.p_period) / 4; idle_min_adjustment = -((int) scale3d.p_idle_min) / 2; idle_max_adjustment = -((int) scale3d.p_idle_max) / 2; } else { @@ -277,33 +326,23 @@ static void scaling_adjust(ktime_t time) diff *= 2; } - fast_response_adjustment = diff * - (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2)); - period_adjustment = - diff * (scale3d.p_period / HINT_RATIO_DIFF); idle_min_adjustment = (factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF; idle_max_adjustment = (factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF; } - scale3d.fast_response = - scale3d.p_fast_response + fast_response_adjustment; - scale3d.period = scale3d.p_period + period_adjustment; - scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment; + scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment; scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment; - if (scale3d.p_verbosity >= 10) - pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n", + if (scale3d.p_verbosity & GR3D_PRINT_STATS) + pr_info("scale3d stats: + %d - %d min %u max %u\n", scale3d.fast_up_count, scale3d.slow_down_count, - scale3d.fast_responses, scale3d.fast_response, - scale3d.period, scale3d.idle_min, scale3d.idle_max); + scale3d.idle_min, scale3d.idle_max); scale3d.fast_up_count = 0; scale3d.slow_down_count = 0; - scale3d.fast_responses = 0; - scale3d.last_down = time; - scale3d.last_tweak = time; + scale3d.last_adjust = time; } #undef SCALING_ADJUST_PERIOD @@ -316,61 +355,101 @@ static void scaling_state_check(ktime_t time) { unsigned long dt; - /* adjustment: set scale parameters (fast_response, period) +/- 25% + /* adjustment: set scale parameters (idle_min, idle_max) +/- 25% * based on ratio of scale up to scale down hints */ if (scale3d.p_adjust) scaling_adjust(time); else { - scale3d.fast_response = scale3d.p_fast_response; - scale3d.period = scale3d.p_period; scale3d.idle_min = scale3d.p_idle_min; scale3d.idle_max = scale3d.p_idle_max; } - /* check for load peaks */ - dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame); - if (dt > scale3d.fast_response) { - unsigned long idleness = - (scale3d.idle_short_term_total * 100) / dt; - scale3d.fast_responses++; - scale3d.fast_frame = time; - /* if too busy, scale up */ - if (idleness < scale3d.idle_min) { - scale3d.is_scaled = 0; - scale3d.fast_up_count++; - if (scale3d.p_verbosity >= 5) - pr_info("scale3d: %ld%% busy\n", - 100 - idleness); - - reset_3d_clocks(); - reset_scaling_counters(time); - return; - } - scale3d.idle_short_term_total = 0; - scale3d.last_short_term_idle = time; + dt = (unsigned long) ktime_us_delta(time, scale3d.last_scale); + if (dt < scale3d.p_estimation_window) + return; + + scale3d.last_scale = time; + + /* if too busy, scale up */ + if (scale3d.idle_estimate < scale3d.idle_min) { + scale3d.is_scaled = 0; + scale3d.fast_up_count++; + if (scale3d.p_verbosity & GR3D_PRINT_BUSY) + pr_info("scale3d: %ld/1000 busy\n", + 1000 - scale3d.idle_estimate); + + reset_3d_clocks(); + return; } - dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame); - if (dt > scale3d.period) { - unsigned long idleness = (scale3d.idle_total * 100) / dt; + if (scale3d.p_verbosity & GR3D_PRINT_IDLE) + pr_info("scale3d: idle %lu/1000\n", + scale3d.idle_estimate); - if (scale3d.p_verbosity >= 5) - pr_info("scale3d: idle %lu, ~%lu%%\n", - scale3d.idle_total, idleness); + if (scale3d.idle_estimate > scale3d.idle_max) { + if (!scale3d.is_scaled) + scale3d.is_scaled = 1; - if (idleness > scale3d.idle_max) { - if (!scale3d.is_scaled) { - scale3d.is_scaled = 1; - scale3d.last_down = time; - } - scale3d.slow_down_count++; - /* if idle time is high, clock down */ - scale3d.scale = 100 - (idleness - scale3d.idle_min); - schedule_work(&scale3d.work); - } + scale3d.slow_down_count++; + /* if idle time is high, clock down */ + scale3d.scale = + 100 - (scale3d.idle_estimate - scale3d.idle_min) / 10; + schedule_work(&scale3d.work); + } +} + +/* the idle estimate is done by keeping 2 time stamps, initially set to the + * same time. Once the estimation_window time has been exceeded, one time + * stamp is moved up to the current time. The idle estimate is calculated + * based on the idle time percentage from the earlier estimate. The next time + * an estimation_window time is exceeded, the previous idle time and estimates + * are moved up - this is intended to prevent abrupt changes to the idle + * estimate. + */ +static void update_load_estimate(int idle) +{ + unsigned long window; + unsigned long t; + + ktime_t now = ktime_get(); + t = ktime_us_delta(now, scale3d.last_notification); - reset_scaling_counters(time); + /* if the last event was over GR3D_TIMEFRAME usec ago (1 sec), the + * current load tracking data is probably stale + */ + if (t > GR3D_TIMEFRAME) { + scale3d.is_idle = idle; + scale3d.last_notification = now; + scale3d.estimation_window = now; + scale3d.last_estimation_window = now; + scale3d.total_idle = 0; + scale3d.last_total_idle = 0; + scale3d.idle_estimate = idle ? 1000 : 0; + return; + } + + if (scale3d.is_idle) { + scale3d.total_idle += t; + scale3d.last_total_idle += t; + } + + scale3d.is_idle = idle; + scale3d.last_notification = now; + + window = ktime_us_delta(now, scale3d.last_estimation_window); + /* prevent division by 0 if events come in less than 1 usec apart */ + if (window > 0) + scale3d.idle_estimate = + (1000 * scale3d.last_total_idle) / window; + + /* move up to the last estimation window */ + if (ktime_us_delta(now, scale3d.estimation_window) > + scale3d.p_estimation_window) { + scale3d.last_estimation_window = scale3d.estimation_window; + scale3d.last_total_idle = scale3d.total_idle; + scale3d.total_idle = 0; + scale3d.estimation_window = now; } } @@ -378,65 +457,226 @@ void nvhost_scale3d_notify_idle(struct nvhost_device *dev) { ktime_t t; unsigned long dt; + int delay; if (!scale3d.enable) return; - mutex_lock(&scale3d.lock); + update_load_estimate(1); t = ktime_get(); - if (scale3d.is_idle) { - dt = ktime_us_delta(t, scale3d.last_idle); - scale3d.idle_total += dt; - dt = ktime_us_delta(t, scale3d.last_short_term_idle); - scale3d.idle_short_term_total += dt; - } else - scale3d.is_idle = 1; + /* if throughput hint enabled, and last hint is recent enough, return */ + if (scale3d.p_use_throughput_hint) { + dt = ktime_us_delta(t, scale3d.last_throughput_hint); + if (dt < GR3D_TIMEFRAME) + return; + } - scale3d.last_idle = t; - scale3d.last_short_term_idle = t; + mutex_lock(&scale3d.lock); - scaling_state_check(scale3d.last_idle); + scaling_state_check(t); - /* delay idle_max % of 2 * fast_response time (given in microseconds) */ - schedule_delayed_work(&scale3d.idle_timer, - msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response) - / 50000)); + /* delay idle_max % of 2 * estimation_window (given in microseconds) */ + delay = (scale3d.idle_max * scale3d.p_estimation_window) / 500000; + schedule_delayed_work(&scale3d.idle_timer, msecs_to_jiffies(delay)); mutex_unlock(&scale3d.lock); } void nvhost_scale3d_notify_busy(struct nvhost_device *dev) { - unsigned long idle; - unsigned long short_term_idle; ktime_t t; if (!scale3d.enable) return; - mutex_lock(&scale3d.lock); - - cancel_delayed_work(&scale3d.idle_timer); + update_load_estimate(0); t = ktime_get(); - if (scale3d.is_idle) { - idle = (unsigned long) - ktime_us_delta(t, scale3d.last_idle); - scale3d.idle_total += idle; - short_term_idle = - ktime_us_delta(t, scale3d.last_short_term_idle); - scale3d.idle_short_term_total += short_term_idle; - scale3d.is_idle = 0; + /* if throughput hint enabled, and last hint is recent enough, return */ + if (scale3d.p_use_throughput_hint) { + unsigned long dt; + dt = ktime_us_delta(t, scale3d.last_throughput_hint); + if (dt < GR3D_TIMEFRAME) + return; } + mutex_lock(&scale3d.lock); + + cancel_delayed_work(&scale3d.idle_timer); scaling_state_check(t); mutex_unlock(&scale3d.lock); } +struct score { + int size; /* number of elements */ + int pos; /* position in ring buffer */ + int count; /* actual item count */ + unsigned int sum; /* running sum */ + unsigned int prev; /* previous score after 'reset' operation */ + unsigned int list[]; /* ring buffer */ +}; + +static struct score *score_init(int capacity) +{ + struct score *s; + + s = kzalloc(sizeof(struct score) + capacity * sizeof(int), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->size = capacity; + + return s; +} + +static void score_delete(struct score *s) +{ + kfree(s); +} + +#define score_get_average(s) ((s)->count ? (s)->sum / (s)->count : 0) + +static void score_add(struct score *s, unsigned int reading) +{ + if (s->count < s->size) { + s->sum += reading; + s->count++; + } else + s->sum = s->sum - s->list[s->pos] + reading; + + s->list[s->pos] = reading; + s->pos = (s->pos + 1) % s->size; +} + + +static unsigned int score_reset(struct score *s) +{ + s->prev = s->sum; + + s->count = 0; + s->pos = 0; + s->sum = 0; + + return s->prev; +} + +int freqlist_up(long target, int steps) +{ + int i, pos; + + for (i = 0; i < scale3d.freq_count; i++) + if (scale3d.freqlist[i] >= target) + break; + + pos = min(scale3d.freq_count - 1, i + steps); + return scale3d.freqlist[pos]; +} + +int freqlist_down(long target, int steps) +{ + int i, pos; + + for (i = scale3d.freq_count - 1; i >= 0; i--) + if (scale3d.freqlist[i] <= target) + break; + + pos = max(0, i - steps); + return scale3d.freqlist[pos]; +} + +static struct score *busy_history; +static struct score *hint_history; + +/* When a throughput hint is given, perform scaling based on the hint and on + * the current idle estimation. This is done as follows: + * + * 1. On moderate loads force min frequency if the throughput hint is not too + * low. + * 2. Otherwise, calculate target-rate = max-rate * load-percentage + * 3. Unless the current or average throughput hint is below the minimum + * limit, in which case, choose a higher rate + * 4. Or the average throughput hint is above the maximum limit, in which case, + * choose a lower rate. + */ +void nvhost_scale3d_set_throughput_hint(int hint) +{ + ktime_t now; + long busy; + long curr; + long target; + long dt; + int avg_busy, avg_hint; + + if (!scale3d.enable) + return; + + if (!scale3d.p_use_throughput_hint) + return; + + if (scale3d.p_verbosity & GR3D_PRINT_HINT) + pr_info("3fds: idle %ld, hint %d\n", + scale3d.idle_estimate, hint); + + now = ktime_get(); + dt = ktime_us_delta(now, scale3d.last_throughput_hint); + if (dt > GR3D_TIMEFRAME) { + score_reset(busy_history); + score_reset(hint_history); + } + + scale3d.last_throughput_hint = now; + + busy = 1000 - scale3d.idle_estimate; + curr = clk_get_rate(scale3d.clk_3d); + target = scale3d.min_rate_3d; + + score_add(busy_history, busy); + score_add(hint_history, hint); + + avg_busy = score_get_average(busy_history); + avg_hint = score_get_average(hint_history); + + if (busy > 0) + target = (curr / 1000) * busy; + + /* In practice, running the gpu at min frequency is typically + * sufficient to keep up performance at loads up to 70% on cases, + * but the average hint value is tested to keep performance up if + * needed. + */ + if (avg_busy <= scale3d.p_busy_cutoff && + avg_hint >= scale3d.p_throughput_lower_limit) + target = scale3d.min_rate_3d; + else { + target = (scale3d.max_rate_3d / 1000) * avg_busy; + + /* Scale up if either the current hint or the running average + * are below the target to prevent performance drop. + */ + if (hint <= scale3d.p_throughput_lo_limit || + avg_hint <= scale3d.p_throughput_lo_limit) { + if (target < curr) + target = curr; + target = freqlist_up(target, scale3d.p_scale_step); + } else if (avg_hint >= scale3d.p_throughput_hi_limit) { + if (target > curr) + target = curr; + target = freqlist_down(target, scale3d.p_scale_step); + } + } + + scale_to_freq(target); + + if (scale3d.p_verbosity & GR3D_PRINT_TARGET) + pr_info("3dfs: busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n", + busy, avg_busy, curr / 1000000, target, hint, avg_hint); +} +EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint); + static void scale3d_idle_handler(struct work_struct *work) { int notify_idle = 0; @@ -458,19 +698,6 @@ static void scale3d_idle_handler(struct work_struct *work) nvhost_scale3d_notify_idle(NULL); } -void nvhost_scale3d_reset() -{ - ktime_t t; - - if (!scale3d.enable) - return; - - t = ktime_get(); - mutex_lock(&scale3d.lock); - reset_scaling_counters(t); - mutex_unlock(&scale3d.lock); -} - /* * debugfs parameters to control 3d clock scaling */ @@ -495,13 +722,17 @@ void nvhost_scale3d_debug_init(struct dentry *de) } \ } while (0) - CREATE_SCALE3D_FILE(fast_response); + CREATE_SCALE3D_FILE(estimation_window); CREATE_SCALE3D_FILE(idle_min); CREATE_SCALE3D_FILE(idle_max); - CREATE_SCALE3D_FILE(period); CREATE_SCALE3D_FILE(adjust); CREATE_SCALE3D_FILE(scale_emc); CREATE_SCALE3D_FILE(emc_dip); + CREATE_SCALE3D_FILE(use_throughput_hint); + CREATE_SCALE3D_FILE(throughput_hi_limit); + CREATE_SCALE3D_FILE(throughput_lo_limit); + CREATE_SCALE3D_FILE(throughput_lower_limit); + CREATE_SCALE3D_FILE(scale_step); CREATE_SCALE3D_FILE(verbosity); #undef CREATE_SCALE3D_FILE } @@ -532,12 +763,17 @@ static ssize_t enable_3d_scaling_store(struct device *dev, static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR, enable_3d_scaling_show, enable_3d_scaling_store); +#define MAX_FREQ_COUNT 0x40 /* 64 frequencies should be enough for anyone */ + void nvhost_scale3d_init(struct nvhost_device *d) { if (!scale3d.init) { int error; unsigned long max_emc, min_emc; long correction; + long rate; + int freqs[MAX_FREQ_COUNT]; + mutex_init(&scale3d.lock); INIT_WORK(&scale3d.work, scale3d_clocks_handler); @@ -632,30 +868,74 @@ void nvhost_scale3d_init(struct nvhost_device *d) POW2(scale3d.max_rate_3d - scale3d.emc_xmid); scale3d.emc_dip_offset -= correction; + scale3d.is_idle = 1; + /* set scaling parameter defaults */ scale3d.enable = 1; - scale3d.period = scale3d.p_period = 100000; - scale3d.idle_min = scale3d.p_idle_min = 10; - scale3d.idle_max = scale3d.p_idle_max = 15; - scale3d.fast_response = scale3d.p_fast_response = 7000; + scale3d.idle_min = scale3d.p_idle_min = 100; + scale3d.idle_max = scale3d.p_idle_max = 150; scale3d.p_scale_emc = 1; scale3d.p_emc_dip = 1; scale3d.p_verbosity = 0; scale3d.p_adjust = 1; + scale3d.p_use_throughput_hint = 1; + scale3d.p_throughput_lower_limit = 940; + scale3d.p_throughput_lo_limit = 990; + scale3d.p_throughput_hi_limit = 1010; + scale3d.p_scale_step = 1; + scale3d.p_estimation_window = 8000; + scale3d.p_busy_cutoff = 750; error = device_create_file(&d->dev, &dev_attr_enable_3d_scaling); if (error) dev_err(&d->dev, "failed to create sysfs attributes"); + rate = 0; + scale3d.freq_count = 0; + while (rate <= scale3d.max_rate_3d) { + long rounded_rate; + if (unlikely(scale3d.freq_count == MAX_FREQ_COUNT)) { + pr_err("%s: too many frequencies\n", __func__); + break; + } + rounded_rate = + clk_round_rate(scale3d.clk_3d, rate); + freqs[scale3d.freq_count++] = rounded_rate; + rate = rounded_rate + 2000; + } + scale3d.freqlist = + kmalloc(scale3d.freq_count * sizeof(int), GFP_KERNEL); + if (scale3d.freqlist == NULL) + pr_err("%s: can\'t allocate freq table\n", __func__); + + memcpy(scale3d.freqlist, freqs, + scale3d.freq_count * sizeof(int)); + + busy_history = score_init(GR3D_FRAME_SPAN); + if (busy_history == NULL) + pr_err("%s: can\'t init load tracking array\n", + __func__); + + hint_history = score_init(GR3D_FRAME_SPAN); + if (hint_history == NULL) + pr_err("%s: can\'t init throughput tracking array\n", + __func__); + scale3d.init = 1; } - - nvhost_scale3d_reset(); } void nvhost_scale3d_deinit(struct nvhost_device *dev) { device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling); scale3d.init = 0; + if (scale3d.freqlist != NULL) { + kfree(scale3d.freqlist); + scale3d.freq_count = 0; + scale3d.freqlist = NULL; + } + + score_delete(busy_history); + score_delete(hint_history); } diff --git a/drivers/video/tegra/host/host1x/host1x.c b/drivers/video/tegra/host/host1x/host1x.c index 33ebc1ff5d22..31899c78065b 100644 --- a/drivers/video/tegra/host/host1x/host1x.c +++ b/drivers/video/tegra/host/host1x/host1x.c @@ -308,6 +308,19 @@ static int power_off_host(struct nvhost_device *dev) return 0; } +static void clock_on_host(struct nvhost_device *dev) +{ + struct nvhost_master *host = nvhost_get_drvdata(dev); + nvhost_intr_start(&host->intr, clk_get_rate(dev->clk[0])); +} + +static int clock_off_host(struct nvhost_device *dev) +{ + struct nvhost_master *host = nvhost_get_drvdata(dev); + nvhost_intr_stop(&host->intr); + return 0; +} + static int __devinit nvhost_user_init(struct nvhost_master *host) { int err, devno; @@ -516,6 +529,8 @@ static struct nvhost_driver nvhost_driver = { }, .finalize_poweron = power_on_host, .prepare_poweroff = power_off_host, + .finalize_clockon = clock_on_host, + .prepare_clockoff = clock_off_host, }; static int __init nvhost_mod_init(void) diff --git a/drivers/video/tegra/host/host1x/host1x_cdma.c b/drivers/video/tegra/host/host1x/host1x_cdma.c index 2e7ff5783a37..5a29ff652efe 100644 --- a/drivers/video/tegra/host/host1x/host1x_cdma.c +++ b/drivers/video/tegra/host/host1x/host1x_cdma.c @@ -233,12 +233,15 @@ static void cdma_timeout_cpu_incr(struct nvhost_cdma *cdma, u32 getptr, /* after CPU incr, ensure shadow is up to date */ nvhost_syncpt_update_min(&dev->syncpt, cdma->timeout.syncpt_id); - /* update WAITBASE_3D by same number of incrs */ - if (waitbases) { + /* Synchronize wait bases. 2D wait bases are synchronized with + * syncpoint 19. Hence wait bases are not updated when syncptid=18. */ + + if (cdma->timeout.syncpt_id != NVSYNCPT_2D_0 && waitbases) { void __iomem *p; p = dev->sync_aperture + host1x_sync_syncpt_base_0_r() + - (ffs(waitbases) * sizeof(u32)); + (__ffs(waitbases) * sizeof(u32)); writel(syncval, p); + dev->syncpt.base_val[__ffs(waitbases)] = syncval; } /* NOP all the PB slots */ @@ -486,7 +489,7 @@ static void cdma_timeout_handler(struct work_struct *work) /* stop HW, resetting channel/module */ cdma_op().timeout_teardown_begin(cdma); - nvhost_cdma_update_sync_queue(cdma, sp, dev->dev); + nvhost_cdma_update_sync_queue(cdma, sp, ch->dev); mutex_unlock(&cdma->lock); } diff --git a/drivers/video/tegra/host/host1x/host1x_channel.c b/drivers/video/tegra/host/host1x/host1x_channel.c index 9e9fc25dc966..0274413ff698 100644 --- a/drivers/video/tegra/host/host1x/host1x_channel.c +++ b/drivers/video/tegra/host/host1x/host1x_channel.c @@ -365,7 +365,7 @@ static int host1x_channel_read_3d_reg( if (hwctx_to_save) { syncpt_incrs += hwctx_to_save->save_incrs; hwctx_to_save->hwctx.valid = true; - channel->ctxhandler->get(&hwctx_to_save->hwctx); + nvhost_job_get_hwctx(job, &hwctx_to_save->hwctx); } channel->cur_ctx = hwctx; if (channel->cur_ctx && channel->cur_ctx->valid) { @@ -470,7 +470,8 @@ static int host1x_channel_read_3d_reg( wait_event(wq, nvhost_syncpt_is_expired(&nvhost_get_host(channel->dev)->syncpt, p->syncpt, syncval - 2)); - nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, ref); + nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, p->syncpt, + ref); /* Read the register value from FIFO */ err = host1x_drain_read_fifo(channel, value, 1, &pending); @@ -580,7 +581,6 @@ static int host1x_save_context(struct nvhost_channel *ch) } hwctx_to_save->valid = true; - ch->ctxhandler->get(hwctx_to_save); ch->cur_ctx = NULL; syncpt_id = to_host1x_hwctx_handler(hwctx_to_save->h)->syncpt; @@ -623,7 +623,7 @@ static int host1x_save_context(struct nvhost_channel *ch) nvhost_syncpt_is_expired(&nvhost_get_host(ch->dev)->syncpt, syncpt_id, syncpt_val)); - nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, ref); + nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, syncpt_id, ref); nvhost_cdma_update(&ch->cdma); diff --git a/drivers/video/tegra/host/host1x/host1x_intr.c b/drivers/video/tegra/host/host1x/host1x_intr.c index 62fd07cbb9ba..facb818a0c24 100644 --- a/drivers/video/tegra/host/host1x/host1x_intr.c +++ b/drivers/video/tegra/host/host1x/host1x_intr.c @@ -131,6 +131,16 @@ static void t20_intr_enable_syncpt_intr(struct nvhost_intr *intr, u32 id) BIT_WORD(id) * REGISTER_STRIDE); } +static void t20_intr_disable_syncpt_intr(struct nvhost_intr *intr, u32 id) +{ + struct nvhost_master *dev = intr_to_dev(intr); + void __iomem *sync_regs = dev->sync_aperture; + + writel(BIT_MASK(id), sync_regs + + host1x_sync_syncpt_thresh_int_disable_r() + + BIT_WORD(id) * REGISTER_STRIDE); +} + static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr) { struct nvhost_master *dev = intr_to_dev(intr); @@ -140,7 +150,7 @@ static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr) for (reg = 0; reg <= BIT_WORD(dev->info.nb_pts) * REGISTER_STRIDE; reg += REGISTER_STRIDE) { /* disable interrupts for both cpu's */ - writel(0, sync_regs + + writel(0xffffffffu, sync_regs + host1x_sync_syncpt_thresh_int_disable_r() + reg); @@ -276,6 +286,7 @@ static const struct nvhost_intr_ops host1x_intr_ops = { .set_host_clocks_per_usec = t20_intr_set_host_clocks_per_usec, .set_syncpt_threshold = t20_intr_set_syncpt_threshold, .enable_syncpt_intr = t20_intr_enable_syncpt_intr, + .disable_syncpt_intr = t20_intr_disable_syncpt_intr, .disable_all_syncpt_intrs = t20_intr_disable_all_syncpt_intrs, .request_host_general_irq = t20_intr_request_host_general_irq, .free_host_general_irq = t20_intr_free_host_general_irq, diff --git a/drivers/video/tegra/host/mpe/mpe.c b/drivers/video/tegra/host/mpe/mpe.c index c738700469c6..d76ee0108eef 100644 --- a/drivers/video/tegra/host/mpe/mpe.c +++ b/drivers/video/tegra/host/mpe/mpe.c @@ -212,7 +212,7 @@ struct save_info { unsigned int restore_count; }; -static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_begin(struct host1x_hwctx_handler *h, u32 *ptr) { /* MPE: when done, increment syncpt to base+1 */ ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID, 0, 0); @@ -229,7 +229,7 @@ static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr) } #define SAVE_BEGIN_SIZE 5 -static void __init save_direct(u32 *ptr, u32 start_reg, u32 count) +static void save_direct(u32 *ptr, u32 start_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, host1x_uclass_indoff_r(), 1); @@ -239,7 +239,7 @@ static void __init save_direct(u32 *ptr, u32 start_reg, u32 count) } #define SAVE_DIRECT_SIZE 3 -static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count) +static void save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count) { ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID, cmd_reg, 1); @@ -247,7 +247,7 @@ static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count) } #define SAVE_SET_RAM_CMD_SIZE 2 -static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg) +static void save_read_ram_data_nasty(u32 *ptr, u32 data_reg) { ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, host1x_uclass_indoff_r(), 1); @@ -261,7 +261,7 @@ static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg) } #define SAVE_READ_RAM_DATA_NASTY_SIZE 5 -static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr) +static void save_end(struct host1x_hwctx_handler *h, u32 *ptr) { /* Wait for context read service to finish (cpu incr 3) */ ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID, @@ -275,7 +275,7 @@ static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr) } #define SAVE_END_SIZE 5 -static void __init setup_save_regs(struct save_info *info, +static void setup_save_regs(struct save_info *info, const struct hwctx_reginfo *regs, unsigned int nr_regs) { @@ -304,7 +304,7 @@ static void __init setup_save_regs(struct save_info *info, info->restore_count = restore_count; } -static void __init setup_save_ram_nasty(struct save_info *info, unsigned words, +static void setup_save_ram_nasty(struct save_info *info, unsigned words, unsigned cmd_reg, unsigned data_reg) { u32 *ptr = info->ptr; @@ -330,7 +330,7 @@ static void __init setup_save_ram_nasty(struct save_info *info, unsigned words, info->restore_count = restore_count; } -static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr) +static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr) { struct save_info info = { ptr, @@ -553,7 +553,7 @@ struct nvhost_hwctx_handler *nvhost_mpe_ctxhandler_init(u32 syncpt, p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32, mem_mgr_flag_write_combine); - if (IS_ERR(p->save_buf)) { + if (IS_ERR_OR_NULL(p->save_buf)) { p->save_buf = NULL; return NULL; } diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c index 06005c423a21..5bde55ad2ff5 100644 --- a/drivers/video/tegra/host/nvhost_acm.c +++ b/drivers/video/tegra/host/nvhost_acm.c @@ -101,8 +101,17 @@ void nvhost_module_reset(struct nvhost_device *dev) static void to_state_clockgated_locked(struct nvhost_device *dev) { + struct nvhost_driver *drv = to_nvhost_driver(dev->dev.driver); + if (dev->powerstate == NVHOST_POWER_STATE_RUNNING) { - int i; + int i, err; + if (drv->prepare_clockoff) { + err = drv->prepare_clockoff(dev); + if (err) { + dev_err(&dev->dev, "error clock gating"); + return; + } + } for (i = 0; i < dev->num_clks; i++) clk_disable(dev->clk[i]); if (dev->dev.parent) @@ -141,6 +150,14 @@ static void to_state_running_locked(struct nvhost_device *dev) } } + /* Invoke callback after enabling clock. This is used for + * re-enabling host1x interrupts. */ + if (prev_state == NVHOST_POWER_STATE_CLOCKGATED + && drv->finalize_clockon) + drv->finalize_clockon(dev); + + /* Invoke callback after power un-gating. This is used for + * restoring context. */ if (prev_state == NVHOST_POWER_STATE_POWERGATED && drv->finalize_poweron) drv->finalize_poweron(dev); @@ -343,15 +360,17 @@ void nvhost_module_remove_client(struct nvhost_device *dev, void *priv) { int i; struct nvhost_module_client *m; + int found = 0; mutex_lock(&client_list_lock); list_for_each_entry(m, &dev->client_list, node) { if (priv == m->priv) { list_del(&m->node); + found = 1; break; } } - if (m) { + if (found) { kfree(m); for (i = 0; i < dev->num_clks; i++) nvhost_module_update_rate(dev, i); diff --git a/drivers/video/tegra/host/nvhost_intr.c b/drivers/video/tegra/host/nvhost_intr.c index 38a04f151e87..9788d32bd4a9 100644 --- a/drivers/video/tegra/host/nvhost_intr.c +++ b/drivers/video/tegra/host/nvhost_intr.c @@ -210,7 +210,9 @@ static int process_wait_list(struct nvhost_intr *intr, remove_completed_waiters(&syncpt->wait_head, threshold, completed); empty = list_empty(&syncpt->wait_head); - if (!empty) + if (empty) + intr_op().disable_syncpt_intr(intr, syncpt->id); + else reset_threshold_interrupt(intr, &syncpt->wait_head, syncpt->id); @@ -327,14 +329,20 @@ void *nvhost_intr_alloc_waiter() GFP_KERNEL|__GFP_REPEAT); } -void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref) +void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref) { struct nvhost_waitlist *waiter = ref; + struct nvhost_intr_syncpt *syncpt; + struct nvhost_master *host = intr_to_dev(intr); while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) == WLS_REMOVED) schedule(); + syncpt = intr->syncpt + id; + (void)process_wait_list(intr, syncpt, + nvhost_syncpt_update_min(&host->syncpt, id)); + kref_put(&waiter->refcount, waiter_release); } diff --git a/drivers/video/tegra/host/nvhost_intr.h b/drivers/video/tegra/host/nvhost_intr.h index cf0b6b9e8934..d4a6157eced1 100644 --- a/drivers/video/tegra/host/nvhost_intr.h +++ b/drivers/video/tegra/host/nvhost_intr.h @@ -104,7 +104,7 @@ void *nvhost_intr_alloc_waiter(void); * You must call this if you passed non-NULL as ref. * @ref the ref returned from nvhost_intr_add_action() */ -void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref); +void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref); int nvhost_intr_init(struct nvhost_intr *intr, u32 irq_gen, u32 irq_sync); void nvhost_intr_deinit(struct nvhost_intr *intr); diff --git a/drivers/video/tegra/host/nvhost_job.c b/drivers/video/tegra/host/nvhost_job.c index f93d7df1a552..f0f7e64d4504 100644 --- a/drivers/video/tegra/host/nvhost_job.c +++ b/drivers/video/tegra/host/nvhost_job.c @@ -34,19 +34,27 @@ /* Magic to use to fill freed handle slots */ #define BAD_MAGIC 0xdeadbeef -static int job_size(struct nvhost_submit_hdr_ext *hdr) +static size_t job_size(struct nvhost_submit_hdr_ext *hdr) { - int num_relocs = hdr ? hdr->num_relocs : 0; - int num_waitchks = hdr ? hdr->num_waitchks : 0; - int num_cmdbufs = hdr ? hdr->num_cmdbufs : 0; - int num_unpins = num_cmdbufs + num_relocs; + s64 num_relocs = hdr ? (int)hdr->num_relocs : 0; + s64 num_waitchks = hdr ? (int)hdr->num_waitchks : 0; + s64 num_cmdbufs = hdr ? (int)hdr->num_cmdbufs : 0; + s64 num_unpins = num_cmdbufs + num_relocs; + s64 total; - return sizeof(struct nvhost_job) + if(num_relocs < 0 || num_waitchks < 0 || num_cmdbufs < 0) + return 0; + + total = sizeof(struct nvhost_job) + num_relocs * sizeof(struct nvhost_reloc) + num_relocs * sizeof(struct nvhost_reloc_shift) + num_unpins * sizeof(struct mem_handle *) + num_waitchks * sizeof(struct nvhost_waitchk) + num_cmdbufs * sizeof(struct nvhost_job_gather); + + if(total > ULONG_MAX) + return 0; + return (size_t)total; } static void init_fields(struct nvhost_job *job, @@ -63,7 +71,11 @@ static void init_fields(struct nvhost_job *job, job->priority = priority; job->clientid = clientid; - /* Redistribute memory to the structs */ + /* + * Redistribute memory to the structs. + * Overflows and negative conditions have + * already been checked in job_alloc(). + */ mem += sizeof(struct nvhost_job); job->relocarray = num_relocs ? mem : NULL; mem += num_relocs * sizeof(struct nvhost_reloc); @@ -91,8 +103,11 @@ struct nvhost_job *nvhost_job_alloc(struct nvhost_channel *ch, int clientid) { struct nvhost_job *job = NULL; + size_t size = job_size(hdr); - job = vzalloc(job_size(hdr)); + if(!size) + goto error; + job = vzalloc(size); if (!job) goto error; diff --git a/drivers/video/tegra/host/nvhost_syncpt.c b/drivers/video/tegra/host/nvhost_syncpt.c index 9fa7d0652c1f..38c28ca116e7 100644 --- a/drivers/video/tegra/host/nvhost_syncpt.c +++ b/drivers/video/tegra/host/nvhost_syncpt.c @@ -235,7 +235,7 @@ int nvhost_syncpt_wait_timeout(struct nvhost_syncpt *sp, u32 id, check_count++; } } - nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), ref); + nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), id, ref); done: nvhost_module_idle(syncpt_to_dev(sp)->dev); @@ -344,7 +344,7 @@ static ssize_t syncpt_min_show(struct kobject *kobj, struct nvhost_syncpt_attr *syncpt_attr = container_of(attr, struct nvhost_syncpt_attr, attr); - return snprintf(buf, PAGE_SIZE, "%d", + return snprintf(buf, PAGE_SIZE, "%u", nvhost_syncpt_read(&syncpt_attr->host->syncpt, syncpt_attr->id)); } @@ -355,7 +355,7 @@ static ssize_t syncpt_max_show(struct kobject *kobj, struct nvhost_syncpt_attr *syncpt_attr = container_of(attr, struct nvhost_syncpt_attr, attr); - return snprintf(buf, PAGE_SIZE, "%d", + return snprintf(buf, PAGE_SIZE, "%u", nvhost_syncpt_read_max(&syncpt_attr->host->syncpt, syncpt_attr->id)); } diff --git a/drivers/video/tegra/host/t30/t30.c b/drivers/video/tegra/host/t30/t30.c index 0c8d626a4d67..334d598d5c0b 100644 --- a/drivers/video/tegra/host/t30/t30.c +++ b/drivers/video/tegra/host/t30/t30.c @@ -142,7 +142,7 @@ static struct nvhost_device tegra_gr2d02_device = { .waitbases = BIT(NVWAITBASE_2D_0) | BIT(NVWAITBASE_2D_1), .modulemutexes = BIT(NVMODMUTEX_2D_FULL) | BIT(NVMODMUTEX_2D_SIMPLE) | BIT(NVMODMUTEX_2D_SB_A) | BIT(NVMODMUTEX_2D_SB_B), - .clocks = { {"gr2d", UINT_MAX}, + .clocks = { {"gr2d", 0}, {"epp", 0}, {"emc", 300000000} }, NVHOST_MODULE_NO_POWERGATE_IDS, |