summaryrefslogtreecommitdiff
path: root/drivers/video/tegra/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/video/tegra/host')
-rw-r--r--drivers/video/tegra/host/bus.c2
-rw-r--r--drivers/video/tegra/host/bus_client.c3
-rw-r--r--drivers/video/tegra/host/chip_support.h1
-rw-r--r--drivers/video/tegra/host/gr3d/gr3d.c4
-rw-r--r--drivers/video/tegra/host/gr3d/gr3d.h3
-rw-r--r--drivers/video/tegra/host/gr3d/gr3d_t20.c14
-rw-r--r--drivers/video/tegra/host/gr3d/gr3d_t30.c28
-rw-r--r--drivers/video/tegra/host/gr3d/scale3d.c602
-rw-r--r--drivers/video/tegra/host/host1x/host1x.c15
-rw-r--r--drivers/video/tegra/host/host1x/host1x_cdma.c11
-rw-r--r--drivers/video/tegra/host/host1x/host1x_channel.c8
-rw-r--r--drivers/video/tegra/host/host1x/host1x_intr.c13
-rw-r--r--drivers/video/tegra/host/mpe/mpe.c18
-rw-r--r--drivers/video/tegra/host/nvhost_acm.c23
-rw-r--r--drivers/video/tegra/host/nvhost_intr.c12
-rw-r--r--drivers/video/tegra/host/nvhost_intr.h2
-rw-r--r--drivers/video/tegra/host/nvhost_job.c31
-rw-r--r--drivers/video/tegra/host/nvhost_syncpt.c6
-rw-r--r--drivers/video/tegra/host/t30/t30.c2
19 files changed, 583 insertions, 215 deletions
diff --git a/drivers/video/tegra/host/bus.c b/drivers/video/tegra/host/bus.c
index 758a5ca4ad94..f22dac288051 100644
--- a/drivers/video/tegra/host/bus.c
+++ b/drivers/video/tegra/host/bus.c
@@ -96,7 +96,7 @@ static int nvhost_bus_match(struct device *_dev, struct device_driver *drv)
if (ndrv->id_table)
return nvhost_bus_match_id(dev, ndrv->id_table) != NULL;
else /* driver does not support id_table */
- return !strncmp(dev->name, drv->name, strlen(drv->name));
+ return !strcmp(dev->name, drv->name);
}
static int nvhost_drv_probe(struct device *_dev)
diff --git a/drivers/video/tegra/host/bus_client.c b/drivers/video/tegra/host/bus_client.c
index 0137793b39ee..aaa038221971 100644
--- a/drivers/video/tegra/host/bus_client.c
+++ b/drivers/video/tegra/host/bus_client.c
@@ -159,7 +159,8 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp)
}
filp->private_data = priv;
priv->ch = ch;
- nvhost_module_add_client(ch->dev, priv);
+ if(nvhost_module_add_client(ch->dev, priv))
+ goto fail;
if (ch->ctxhandler && ch->ctxhandler->alloc) {
priv->hwctx = ch->ctxhandler->alloc(ch->ctxhandler, ch);
diff --git a/drivers/video/tegra/host/chip_support.h b/drivers/video/tegra/host/chip_support.h
index f5d2811f143f..412ce8b65466 100644
--- a/drivers/video/tegra/host/chip_support.h
+++ b/drivers/video/tegra/host/chip_support.h
@@ -125,6 +125,7 @@ struct nvhost_intr_ops {
void (*set_syncpt_threshold)(
struct nvhost_intr *, u32 id, u32 thresh);
void (*enable_syncpt_intr)(struct nvhost_intr *, u32 id);
+ void (*disable_syncpt_intr)(struct nvhost_intr *, u32 id);
void (*disable_all_syncpt_intrs)(struct nvhost_intr *);
int (*request_host_general_irq)(struct nvhost_intr *);
void (*free_host_general_irq)(struct nvhost_intr *);
diff --git a/drivers/video/tegra/host/gr3d/gr3d.c b/drivers/video/tegra/host/gr3d/gr3d.c
index 715468131d9e..775c77b0e88d 100644
--- a/drivers/video/tegra/host/gr3d/gr3d.c
+++ b/drivers/video/tegra/host/gr3d/gr3d.c
@@ -80,8 +80,10 @@ struct host1x_hwctx *nvhost_3dctx_alloc_common(struct host1x_hwctx_handler *p,
ctx->restore = mem_op().alloc(memmgr, p->restore_size * 4, 32,
map_restore ? mem_mgr_flag_write_combine
: mem_mgr_flag_uncacheable);
- if (IS_ERR_OR_NULL(ctx->restore))
+ if (IS_ERR_OR_NULL(ctx->restore)) {
+ ctx->restore = NULL;
goto fail;
+ }
if (map_restore) {
ctx->restore_virt = mem_op().mmap(ctx->restore);
diff --git a/drivers/video/tegra/host/gr3d/gr3d.h b/drivers/video/tegra/host/gr3d/gr3d.h
index 3855b237b702..61f708cea95c 100644
--- a/drivers/video/tegra/host/gr3d/gr3d.h
+++ b/drivers/video/tegra/host/gr3d/gr3d.h
@@ -29,6 +29,9 @@
#define AR3D_PSEQ_QUAD_ID 0x545
#define AR3D_DW_MEMORY_OUTPUT_ADDRESS 0x904
#define AR3D_DW_MEMORY_OUTPUT_DATA 0x905
+#define AR3D_FDC_CONTROL_0 0xa00
+#define AR3D_FDC_CONTROL_0_RESET_VAL 0xe00
+#define AR3D_FDC_CONTROL_0_INVALIDATE 1
#define AR3D_GSHIM_WRITE_MASK 0xb00
#define AR3D_GSHIM_READ_SELECT 0xb01
#define AR3D_GLOBAL_MEMORY_OUTPUT_READS 0xe40
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t20.c b/drivers/video/tegra/host/gr3d/gr3d_t20.c
index b6e3896fe50c..694b00527790 100644
--- a/drivers/video/tegra/host/gr3d/gr3d_t20.c
+++ b/drivers/video/tegra/host/gr3d/gr3d_t20.c
@@ -144,7 +144,7 @@ static void save_push_v0(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
p->save_phys);
}
-static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr)
{
/* 3d: when done, increment syncpt to base+1 */
ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
@@ -162,7 +162,7 @@ static void __init save_begin_v0(struct host1x_hwctx_handler *h, u32 *ptr)
h->syncpt); /* incr 2 */
}
-static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
+static void save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
{
ptr[0] = nvhost_opcode_nonincr(host1x_uclass_indoff_r(), 1);
ptr[1] = nvhost_class_host_indoff_reg_read(NV_HOST_MODULE_GR3D,
@@ -170,7 +170,7 @@ static void __init save_direct_v0(u32 *ptr, u32 start_reg, u32 count)
ptr[2] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
}
-static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
+static void save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
u32 data_reg, u32 count)
{
ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
@@ -183,7 +183,7 @@ static void __init save_indirect_v0(u32 *ptr, u32 offset_reg, u32 offset,
ptr[4] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
}
-static void __init save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_end_v0(struct host1x_hwctx_handler *h, u32 *ptr)
{
/* Wait for context read service to finish (cpu incr 3) */
ptr[0] = nvhost_opcode_nonincr(host1x_uclass_wait_syncpt_base_r(), 1);
@@ -226,7 +226,7 @@ static u32 *save_regs_v0(u32 *ptr, unsigned int *pending,
/*** save ***/
-static void __init setup_save_regs(struct save_info *info,
+static void setup_save_regs(struct save_info *info,
const struct hwctx_reginfo *regs,
unsigned int nr_regs)
{
@@ -284,7 +284,7 @@ static void __init setup_save_regs(struct save_info *info,
info->restore_count = restore_count;
}
-static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
+static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
{
struct save_info info = {
ptr,
@@ -371,7 +371,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t20_ctxhandler_init(
p->save_buf = mem_op().alloc(memmgr, p->save_size * sizeof(u32), 32,
mem_mgr_flag_write_combine);
- if (IS_ERR(p->save_buf)) {
+ if (IS_ERR_OR_NULL(p->save_buf)) {
p->save_buf = NULL;
return NULL;
}
diff --git a/drivers/video/tegra/host/gr3d/gr3d_t30.c b/drivers/video/tegra/host/gr3d/gr3d_t30.c
index c35fea2f3ac2..664708c7fc80 100644
--- a/drivers/video/tegra/host/gr3d/gr3d_t30.c
+++ b/drivers/video/tegra/host/gr3d/gr3d_t30.c
@@ -125,6 +125,16 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0),
NVHOST_OPCODE_NOOP);
+ /* invalidate the FDC to prevent cache-coherency issues across GPUs
+ note that we assume FDC_CONTROL_0 is left in the reset state by all
+ contexts. the invalidate bit will clear itself, so the register
+ should be unchanged after this */
+ nvhost_cdma_push(cdma,
+ nvhost_opcode_imm(AR3D_FDC_CONTROL_0,
+ AR3D_FDC_CONTROL_0_RESET_VAL
+ | AR3D_FDC_CONTROL_0_INVALIDATE),
+ NVHOST_OPCODE_NOOP);
+
/* set register set 0 and 1 register read memory output addresses,
and send their reads to memory */
@@ -132,7 +142,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
nvhost_opcode_imm(AR3D_GSHIM_WRITE_MASK, 2),
nvhost_opcode_imm(AR3D_GLOBAL_MEMORY_OUTPUT_READS, 1));
nvhost_cdma_push(cdma,
- nvhost_opcode_nonincr(0x904, 1),
+ nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_ADDRESS, 1),
ctx->restore_phys + restore_set1_offset * 4);
nvhost_cdma_push(cdma,
@@ -150,7 +160,7 @@ static void save_push_v1(struct nvhost_hwctx *nctx, struct nvhost_cdma *cdma)
p->save_phys);
}
-static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
+static void save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
{
ptr[0] = nvhost_opcode_nonincr(AR3D_DW_MEMORY_OUTPUT_DATA,
RESTORE_BEGIN_SIZE);
@@ -158,7 +168,7 @@ static void __init save_begin_v1(struct host1x_hwctx_handler *p, u32 *ptr)
ptr += RESTORE_BEGIN_SIZE;
}
-static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
+static void save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
{
ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
AR3D_DW_MEMORY_OUTPUT_DATA, 1);
@@ -172,7 +182,7 @@ static void __init save_direct_v1(u32 *ptr, u32 start_reg, u32 count)
ptr[3] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
}
-static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
+static void save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
u32 data_reg, u32 count)
{
ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID, 0, 0);
@@ -189,7 +199,7 @@ static void __init save_indirect_v1(u32 *ptr, u32 offset_reg, u32 offset,
ptr[5] = nvhost_opcode_nonincr(host1x_uclass_inddata_r(), count);
}
-static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
+static void save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
{
/* write end of restore buffer */
ptr[0] = nvhost_opcode_setclass(NV_GRAPHICS_3D_CLASS_ID,
@@ -224,7 +234,7 @@ static void __init save_end_v1(struct host1x_hwctx_handler *p, u32 *ptr)
-static void __init setup_save_regs(struct save_info *info,
+static void setup_save_regs(struct save_info *info,
const struct hwctx_reginfo *regs,
unsigned int nr_regs)
{
@@ -282,7 +292,7 @@ static void __init setup_save_regs(struct save_info *info,
info->restore_count = restore_count;
}
-static void __init switch_gpu(struct save_info *info,
+static void switch_gpu(struct save_info *info,
unsigned int save_src_set,
u32 save_dest_sets,
u32 restore_dest_sets)
@@ -303,7 +313,7 @@ static void __init switch_gpu(struct save_info *info,
info->restore_count += 1;
}
-static void __init setup_save(struct host1x_hwctx_handler *p, u32 *ptr)
+static void setup_save(struct host1x_hwctx_handler *p, u32 *ptr)
{
struct save_info info = {
ptr,
@@ -399,7 +409,7 @@ struct nvhost_hwctx_handler *nvhost_gr3d_t30_ctxhandler_init(
p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32,
mem_mgr_flag_write_combine);
- if (IS_ERR(p->save_buf)) {
+ if (IS_ERR_OR_NULL(p->save_buf)) {
p->save_buf = NULL;
return NULL;
}
diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c
index 5922b55a836a..49147975a9e4 100644
--- a/drivers/video/tegra/host/gr3d/scale3d.c
+++ b/drivers/video/tegra/host/gr3d/scale3d.c
@@ -1,9 +1,9 @@
/*
- * drivers/video/tegra/host/t20/scale3d.c
+ * drivers/video/tegra/host/gr3d/scale3d.c
*
* Tegra Graphics Host 3D clock scaling
*
- * Copyright (c) 2010-2012, NVIDIA Corporation.
+ * Copyright (c) 2010-2012, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -23,12 +23,11 @@
*
* module3d_notify_busy() is called upon submit, module3d_notify_idle() is
* called when all outstanding submits are completed. Idle times are measured
- * over a fixed time period (scale3d.p_period). If the 3d module idle time
- * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are
- * scaled down. If the percentage goes under the minimum limit (set in
- * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
- * over the time frame given in scale3d.p_fast_response for clocking up
- * quickly in response to load peaks.
+ * over a fixed time period (scale3d.p_estimation_window). If the 3d module
+ * idle time percentage goes over the limit (set in scale3d.p_idle_max), 3d
+ * clocks are scaled down. If the percentage goes under the minimum limit (set
+ * in scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
+ * for clocking up quickly in response to load peaks.
*
* 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
* bezier-like factor added to pull 3d.emc rate a bit lower.
@@ -37,10 +36,31 @@
#include <linux/debugfs.h>
#include <linux/types.h>
#include <linux/clk.h>
+#include <linux/slab.h>
#include <mach/clk.h>
#include <mach/hardware.h>
#include "scale3d.h"
#include "dev.h"
+#include <media/tegra_camera.h>
+
+#define GR3D_PRINT_STATS BIT(1)
+#define GR3D_PRINT_BUSY BIT(2)
+#define GR3D_PRINT_IDLE BIT(3)
+#define GR3D_PRINT_HINT BIT(4)
+#define GR3D_PRINT_TARGET BIT(5)
+
+/* time frame for load and hint tracking - when events come in at a larger
+ * interval, this probably indicates the current estimates are stale
+ */
+#define GR3D_TIMEFRAME 1000000 /* 1 sec */
+
+/* the number of frames to use in the running average of load estimates and
+ * throughput hints. Choosing 6 frames targets a window of about 100 msec.
+ * Large flucutuations in frame times require a window that's large enough to
+ * prevent spiky scaling behavior, which in turn exacerbates frame rate
+ * instability.
+ */
+#define GR3D_FRAME_SPAN 6
static int scale3d_is_enabled(void);
static void scale3d_enable(int enable);
@@ -48,54 +68,73 @@ static void scale3d_enable(int enable);
#define POW2(x) ((x) * (x))
/*
+ * 3D clock scaling should be treated differently when camera is on in AP37.
+ * 3D in AP37 requires 1.3V and combining it with MPE reaches to EDP limit.
+ * 3D clock really needs to be set to lower frequency which requires 1.0V.
+ * The same thing applies to 3D EMC clock.
+ */
+#define CAMERA_3D_CLK 300000000
+#define CAMERA_3D_EMC_CLK 437000000
+
+/*
* debugfs parameters to control 3d clock scaling test
*
- * period - time period for clock rate evaluation
- * fast_response - time period for evaluation of 'busy' spikes
- * idle_min - if less than [idle_min] percent idle over [fast_response]
- * microseconds, clock up.
- * idle_max - if over [idle_max] percent idle over [period] microseconds,
- * clock down.
+ * estimation_window - time period for clock rate evaluation
+ * idle_min - if less than [idle_min / 10] percent idle over
+ * [estimation_window] microseconds, clock up.
+ * idle_max - if over [idle_max] percent idle over [estimation_window]
+ * microseconds, clock down.
* max_scale - limits rate changes to no less than (100 - max_scale)% or
* (100 + 2 * max_scale)% of current clock rate
- * verbosity - set above 5 for debug printouts
+ * verbosity - bit flag to control debug printouts:
+ * 1 - stats
+ * 2 - busy
+ * 3 - idle
+ * 4 - hints
+ * 5 - target frequencies
*/
struct scale3d_info_rec {
struct mutex lock; /* lock for timestamps etc */
int enable;
int init;
- ktime_t idle_frame;
- ktime_t fast_frame;
- ktime_t last_idle;
- ktime_t last_short_term_idle;
+ ktime_t last_scale;
int is_idle;
- ktime_t last_tweak;
- ktime_t last_down;
+ ktime_t last_adjust;
int fast_up_count;
int slow_down_count;
int is_scaled;
- int fast_responses;
- unsigned long idle_total;
- unsigned long idle_short_term_total;
- unsigned long max_rate_3d;
long emc_slope;
long emc_offset;
long emc_dip_slope;
long emc_dip_offset;
long emc_xmid;
+ unsigned long max_rate_3d;
unsigned long min_rate_3d;
+ ktime_t last_throughput_hint;
+
struct work_struct work;
struct delayed_work idle_timer;
+
+ ktime_t last_estimation_window;
+ long last_total_idle;
+ long total_idle;
+ ktime_t estimation_window;
+ ktime_t last_notification;
+ long idle_estimate;
+
unsigned int scale;
- unsigned int p_period;
- unsigned int period;
+ unsigned int p_busy_cutoff;
+ unsigned int p_estimation_window;
+ unsigned int p_use_throughput_hint;
+ unsigned int p_throughput_lo_limit;
+ unsigned int p_throughput_lower_limit;
+ unsigned int p_throughput_hi_limit;
+ unsigned int p_scale_step;
unsigned int p_idle_min;
unsigned int idle_min;
unsigned int p_idle_max;
unsigned int idle_max;
- unsigned int p_fast_response;
- unsigned int fast_response;
unsigned int p_adjust;
unsigned int p_scale_emc;
unsigned int p_emc_dip;
@@ -103,13 +142,15 @@ struct scale3d_info_rec {
struct clk *clk_3d;
struct clk *clk_3d2;
struct clk *clk_3d_emc;
+ int *freqlist;
+ int freq_count;
};
static struct scale3d_info_rec scale3d;
-static void scale3d_clocks(unsigned long percent)
+static void scale_to_freq(unsigned long hz)
{
- unsigned long hz, curr;
+ unsigned long curr;
if (!tegra_is_clk_enabled(scale3d.clk_3d))
return;
@@ -119,7 +160,8 @@ static void scale3d_clocks(unsigned long percent)
return;
curr = clk_get_rate(scale3d.clk_3d);
- hz = percent * (curr / 100);
+ if (hz == curr)
+ return;
if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) {
if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
@@ -139,6 +181,16 @@ static void scale3d_clocks(unsigned long percent)
}
}
+static void scale3d_clocks(unsigned long percent)
+{
+ unsigned long hz, curr;
+
+ curr = clk_get_rate(scale3d.clk_3d);
+ hz = percent * (curr / 100);
+
+ scale_to_freq(hz);
+}
+
static void scale3d_clocks_handler(struct work_struct *work)
{
unsigned int scale;
@@ -164,12 +216,26 @@ void nvhost_scale3d_suspend(struct nvhost_device *dev)
static void reset_3d_clocks(void)
{
if (clk_get_rate(scale3d.clk_3d) != scale3d.max_rate_3d) {
- clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
- if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
- clk_set_rate(scale3d.clk_3d2, scale3d.max_rate_3d);
- if (scale3d.p_scale_emc)
- clk_set_rate(scale3d.clk_3d_emc,
- clk_round_rate(scale3d.clk_3d_emc, UINT_MAX));
+ if (is_tegra_camera_on())
+ clk_set_rate(scale3d.clk_3d, CAMERA_3D_CLK);
+ else
+ clk_set_rate(scale3d.clk_3d, scale3d.max_rate_3d);
+ if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3) {
+ if (is_tegra_camera_on())
+ clk_set_rate(scale3d.clk_3d2, CAMERA_3D_CLK);
+ else
+ clk_set_rate(scale3d.clk_3d2,
+ scale3d.max_rate_3d);
+ }
+ if (scale3d.p_scale_emc) {
+ if (is_tegra_camera_on())
+ clk_set_rate(scale3d.clk_3d_emc,
+ CAMERA_3D_EMC_CLK);
+ else
+ clk_set_rate(scale3d.clk_3d_emc,
+ clk_round_rate(scale3d.clk_3d_emc,
+ UINT_MAX));
+ }
}
}
@@ -207,15 +273,6 @@ static void scale3d_enable(int enable)
reset_3d_clocks();
}
-static void reset_scaling_counters(ktime_t time)
-{
- scale3d.idle_total = 0;
- scale3d.idle_short_term_total = 0;
- scale3d.last_idle = time;
- scale3d.last_short_term_idle = time;
- scale3d.idle_frame = time;
-}
-
/* scaling_adjust - use scale up / scale down hint counts to adjust scaling
* parameters.
*
@@ -228,8 +285,6 @@ static void reset_scaling_counters(ktime_t time)
*
* the parameters adjusted are
*
- * * fast_response time
- * * period - time for scaling down estimate
* * idle_min percentage
* * idle_max percentage
*/
@@ -242,13 +297,11 @@ static void reset_scaling_counters(ktime_t time)
static void scaling_adjust(ktime_t time)
{
long hint_ratio;
- long fast_response_adjustment;
- long period_adjustment;
int idle_min_adjustment;
int idle_max_adjustment;
unsigned long dt;
- dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak);
+ dt = (unsigned long) ktime_us_delta(time, scale3d.last_adjust);
if (dt < SCALING_ADJUST_PERIOD)
return;
@@ -256,13 +309,9 @@ static void scaling_adjust(ktime_t time)
(scale3d.slow_down_count + 1);
if (hint_ratio > HINT_RATIO_MAX) {
- fast_response_adjustment = -((int) scale3d.p_fast_response) / 4;
- period_adjustment = scale3d.p_period / 2;
idle_min_adjustment = scale3d.p_idle_min;
idle_max_adjustment = scale3d.p_idle_max;
} else if (hint_ratio < HINT_RATIO_MIN) {
- fast_response_adjustment = scale3d.p_fast_response / 2;
- period_adjustment = -((int) scale3d.p_period) / 4;
idle_min_adjustment = -((int) scale3d.p_idle_min) / 2;
idle_max_adjustment = -((int) scale3d.p_idle_max) / 2;
} else {
@@ -277,33 +326,23 @@ static void scaling_adjust(ktime_t time)
diff *= 2;
}
- fast_response_adjustment = diff *
- (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2));
- period_adjustment =
- diff * (scale3d.p_period / HINT_RATIO_DIFF);
idle_min_adjustment =
(factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF;
idle_max_adjustment =
(factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF;
}
- scale3d.fast_response =
- scale3d.p_fast_response + fast_response_adjustment;
- scale3d.period = scale3d.p_period + period_adjustment;
- scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
+ scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment;
- if (scale3d.p_verbosity >= 10)
- pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n",
+ if (scale3d.p_verbosity & GR3D_PRINT_STATS)
+ pr_info("scale3d stats: + %d - %d min %u max %u\n",
scale3d.fast_up_count, scale3d.slow_down_count,
- scale3d.fast_responses, scale3d.fast_response,
- scale3d.period, scale3d.idle_min, scale3d.idle_max);
+ scale3d.idle_min, scale3d.idle_max);
scale3d.fast_up_count = 0;
scale3d.slow_down_count = 0;
- scale3d.fast_responses = 0;
- scale3d.last_down = time;
- scale3d.last_tweak = time;
+ scale3d.last_adjust = time;
}
#undef SCALING_ADJUST_PERIOD
@@ -316,61 +355,101 @@ static void scaling_state_check(ktime_t time)
{
unsigned long dt;
- /* adjustment: set scale parameters (fast_response, period) +/- 25%
+ /* adjustment: set scale parameters (idle_min, idle_max) +/- 25%
* based on ratio of scale up to scale down hints
*/
if (scale3d.p_adjust)
scaling_adjust(time);
else {
- scale3d.fast_response = scale3d.p_fast_response;
- scale3d.period = scale3d.p_period;
scale3d.idle_min = scale3d.p_idle_min;
scale3d.idle_max = scale3d.p_idle_max;
}
- /* check for load peaks */
- dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame);
- if (dt > scale3d.fast_response) {
- unsigned long idleness =
- (scale3d.idle_short_term_total * 100) / dt;
- scale3d.fast_responses++;
- scale3d.fast_frame = time;
- /* if too busy, scale up */
- if (idleness < scale3d.idle_min) {
- scale3d.is_scaled = 0;
- scale3d.fast_up_count++;
- if (scale3d.p_verbosity >= 5)
- pr_info("scale3d: %ld%% busy\n",
- 100 - idleness);
-
- reset_3d_clocks();
- reset_scaling_counters(time);
- return;
- }
- scale3d.idle_short_term_total = 0;
- scale3d.last_short_term_idle = time;
+ dt = (unsigned long) ktime_us_delta(time, scale3d.last_scale);
+ if (dt < scale3d.p_estimation_window)
+ return;
+
+ scale3d.last_scale = time;
+
+ /* if too busy, scale up */
+ if (scale3d.idle_estimate < scale3d.idle_min) {
+ scale3d.is_scaled = 0;
+ scale3d.fast_up_count++;
+ if (scale3d.p_verbosity & GR3D_PRINT_BUSY)
+ pr_info("scale3d: %ld/1000 busy\n",
+ 1000 - scale3d.idle_estimate);
+
+ reset_3d_clocks();
+ return;
}
- dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame);
- if (dt > scale3d.period) {
- unsigned long idleness = (scale3d.idle_total * 100) / dt;
+ if (scale3d.p_verbosity & GR3D_PRINT_IDLE)
+ pr_info("scale3d: idle %lu/1000\n",
+ scale3d.idle_estimate);
- if (scale3d.p_verbosity >= 5)
- pr_info("scale3d: idle %lu, ~%lu%%\n",
- scale3d.idle_total, idleness);
+ if (scale3d.idle_estimate > scale3d.idle_max) {
+ if (!scale3d.is_scaled)
+ scale3d.is_scaled = 1;
- if (idleness > scale3d.idle_max) {
- if (!scale3d.is_scaled) {
- scale3d.is_scaled = 1;
- scale3d.last_down = time;
- }
- scale3d.slow_down_count++;
- /* if idle time is high, clock down */
- scale3d.scale = 100 - (idleness - scale3d.idle_min);
- schedule_work(&scale3d.work);
- }
+ scale3d.slow_down_count++;
+ /* if idle time is high, clock down */
+ scale3d.scale =
+ 100 - (scale3d.idle_estimate - scale3d.idle_min) / 10;
+ schedule_work(&scale3d.work);
+ }
+}
+
+/* the idle estimate is done by keeping 2 time stamps, initially set to the
+ * same time. Once the estimation_window time has been exceeded, one time
+ * stamp is moved up to the current time. The idle estimate is calculated
+ * based on the idle time percentage from the earlier estimate. The next time
+ * an estimation_window time is exceeded, the previous idle time and estimates
+ * are moved up - this is intended to prevent abrupt changes to the idle
+ * estimate.
+ */
+static void update_load_estimate(int idle)
+{
+ unsigned long window;
+ unsigned long t;
+
+ ktime_t now = ktime_get();
+ t = ktime_us_delta(now, scale3d.last_notification);
- reset_scaling_counters(time);
+ /* if the last event was over GR3D_TIMEFRAME usec ago (1 sec), the
+ * current load tracking data is probably stale
+ */
+ if (t > GR3D_TIMEFRAME) {
+ scale3d.is_idle = idle;
+ scale3d.last_notification = now;
+ scale3d.estimation_window = now;
+ scale3d.last_estimation_window = now;
+ scale3d.total_idle = 0;
+ scale3d.last_total_idle = 0;
+ scale3d.idle_estimate = idle ? 1000 : 0;
+ return;
+ }
+
+ if (scale3d.is_idle) {
+ scale3d.total_idle += t;
+ scale3d.last_total_idle += t;
+ }
+
+ scale3d.is_idle = idle;
+ scale3d.last_notification = now;
+
+ window = ktime_us_delta(now, scale3d.last_estimation_window);
+ /* prevent division by 0 if events come in less than 1 usec apart */
+ if (window > 0)
+ scale3d.idle_estimate =
+ (1000 * scale3d.last_total_idle) / window;
+
+ /* move up to the last estimation window */
+ if (ktime_us_delta(now, scale3d.estimation_window) >
+ scale3d.p_estimation_window) {
+ scale3d.last_estimation_window = scale3d.estimation_window;
+ scale3d.last_total_idle = scale3d.total_idle;
+ scale3d.total_idle = 0;
+ scale3d.estimation_window = now;
}
}
@@ -378,65 +457,226 @@ void nvhost_scale3d_notify_idle(struct nvhost_device *dev)
{
ktime_t t;
unsigned long dt;
+ int delay;
if (!scale3d.enable)
return;
- mutex_lock(&scale3d.lock);
+ update_load_estimate(1);
t = ktime_get();
- if (scale3d.is_idle) {
- dt = ktime_us_delta(t, scale3d.last_idle);
- scale3d.idle_total += dt;
- dt = ktime_us_delta(t, scale3d.last_short_term_idle);
- scale3d.idle_short_term_total += dt;
- } else
- scale3d.is_idle = 1;
+ /* if throughput hint enabled, and last hint is recent enough, return */
+ if (scale3d.p_use_throughput_hint) {
+ dt = ktime_us_delta(t, scale3d.last_throughput_hint);
+ if (dt < GR3D_TIMEFRAME)
+ return;
+ }
- scale3d.last_idle = t;
- scale3d.last_short_term_idle = t;
+ mutex_lock(&scale3d.lock);
- scaling_state_check(scale3d.last_idle);
+ scaling_state_check(t);
- /* delay idle_max % of 2 * fast_response time (given in microseconds) */
- schedule_delayed_work(&scale3d.idle_timer,
- msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response)
- / 50000));
+ /* delay idle_max % of 2 * estimation_window (given in microseconds) */
+ delay = (scale3d.idle_max * scale3d.p_estimation_window) / 500000;
+ schedule_delayed_work(&scale3d.idle_timer, msecs_to_jiffies(delay));
mutex_unlock(&scale3d.lock);
}
void nvhost_scale3d_notify_busy(struct nvhost_device *dev)
{
- unsigned long idle;
- unsigned long short_term_idle;
ktime_t t;
if (!scale3d.enable)
return;
- mutex_lock(&scale3d.lock);
-
- cancel_delayed_work(&scale3d.idle_timer);
+ update_load_estimate(0);
t = ktime_get();
- if (scale3d.is_idle) {
- idle = (unsigned long)
- ktime_us_delta(t, scale3d.last_idle);
- scale3d.idle_total += idle;
- short_term_idle =
- ktime_us_delta(t, scale3d.last_short_term_idle);
- scale3d.idle_short_term_total += short_term_idle;
- scale3d.is_idle = 0;
+ /* if throughput hint enabled, and last hint is recent enough, return */
+ if (scale3d.p_use_throughput_hint) {
+ unsigned long dt;
+ dt = ktime_us_delta(t, scale3d.last_throughput_hint);
+ if (dt < GR3D_TIMEFRAME)
+ return;
}
+ mutex_lock(&scale3d.lock);
+
+ cancel_delayed_work(&scale3d.idle_timer);
scaling_state_check(t);
mutex_unlock(&scale3d.lock);
}
+struct score {
+ int size; /* number of elements */
+ int pos; /* position in ring buffer */
+ int count; /* actual item count */
+ unsigned int sum; /* running sum */
+ unsigned int prev; /* previous score after 'reset' operation */
+ unsigned int list[]; /* ring buffer */
+};
+
+static struct score *score_init(int capacity)
+{
+ struct score *s;
+
+ s = kzalloc(sizeof(struct score) + capacity * sizeof(int), GFP_KERNEL);
+ if (s == NULL)
+ return NULL;
+
+ s->size = capacity;
+
+ return s;
+}
+
+static void score_delete(struct score *s)
+{
+ kfree(s);
+}
+
+#define score_get_average(s) ((s)->count ? (s)->sum / (s)->count : 0)
+
+static void score_add(struct score *s, unsigned int reading)
+{
+ if (s->count < s->size) {
+ s->sum += reading;
+ s->count++;
+ } else
+ s->sum = s->sum - s->list[s->pos] + reading;
+
+ s->list[s->pos] = reading;
+ s->pos = (s->pos + 1) % s->size;
+}
+
+
+static unsigned int score_reset(struct score *s)
+{
+ s->prev = s->sum;
+
+ s->count = 0;
+ s->pos = 0;
+ s->sum = 0;
+
+ return s->prev;
+}
+
+int freqlist_up(long target, int steps)
+{
+ int i, pos;
+
+ for (i = 0; i < scale3d.freq_count; i++)
+ if (scale3d.freqlist[i] >= target)
+ break;
+
+ pos = min(scale3d.freq_count - 1, i + steps);
+ return scale3d.freqlist[pos];
+}
+
+int freqlist_down(long target, int steps)
+{
+ int i, pos;
+
+ for (i = scale3d.freq_count - 1; i >= 0; i--)
+ if (scale3d.freqlist[i] <= target)
+ break;
+
+ pos = max(0, i - steps);
+ return scale3d.freqlist[pos];
+}
+
+static struct score *busy_history;
+static struct score *hint_history;
+
+/* When a throughput hint is given, perform scaling based on the hint and on
+ * the current idle estimation. This is done as follows:
+ *
+ * 1. On moderate loads force min frequency if the throughput hint is not too
+ * low.
+ * 2. Otherwise, calculate target-rate = max-rate * load-percentage
+ * 3. Unless the current or average throughput hint is below the minimum
+ * limit, in which case, choose a higher rate
+ * 4. Or the average throughput hint is above the maximum limit, in which case,
+ * choose a lower rate.
+ */
+void nvhost_scale3d_set_throughput_hint(int hint)
+{
+ ktime_t now;
+ long busy;
+ long curr;
+ long target;
+ long dt;
+ int avg_busy, avg_hint;
+
+ if (!scale3d.enable)
+ return;
+
+ if (!scale3d.p_use_throughput_hint)
+ return;
+
+ if (scale3d.p_verbosity & GR3D_PRINT_HINT)
+ pr_info("3fds: idle %ld, hint %d\n",
+ scale3d.idle_estimate, hint);
+
+ now = ktime_get();
+ dt = ktime_us_delta(now, scale3d.last_throughput_hint);
+ if (dt > GR3D_TIMEFRAME) {
+ score_reset(busy_history);
+ score_reset(hint_history);
+ }
+
+ scale3d.last_throughput_hint = now;
+
+ busy = 1000 - scale3d.idle_estimate;
+ curr = clk_get_rate(scale3d.clk_3d);
+ target = scale3d.min_rate_3d;
+
+ score_add(busy_history, busy);
+ score_add(hint_history, hint);
+
+ avg_busy = score_get_average(busy_history);
+ avg_hint = score_get_average(hint_history);
+
+ if (busy > 0)
+ target = (curr / 1000) * busy;
+
+ /* In practice, running the gpu at min frequency is typically
+ * sufficient to keep up performance at loads up to 70% on cases,
+ * but the average hint value is tested to keep performance up if
+ * needed.
+ */
+ if (avg_busy <= scale3d.p_busy_cutoff &&
+ avg_hint >= scale3d.p_throughput_lower_limit)
+ target = scale3d.min_rate_3d;
+ else {
+ target = (scale3d.max_rate_3d / 1000) * avg_busy;
+
+ /* Scale up if either the current hint or the running average
+ * are below the target to prevent performance drop.
+ */
+ if (hint <= scale3d.p_throughput_lo_limit ||
+ avg_hint <= scale3d.p_throughput_lo_limit) {
+ if (target < curr)
+ target = curr;
+ target = freqlist_up(target, scale3d.p_scale_step);
+ } else if (avg_hint >= scale3d.p_throughput_hi_limit) {
+ if (target > curr)
+ target = curr;
+ target = freqlist_down(target, scale3d.p_scale_step);
+ }
+ }
+
+ scale_to_freq(target);
+
+ if (scale3d.p_verbosity & GR3D_PRINT_TARGET)
+ pr_info("3dfs: busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n",
+ busy, avg_busy, curr / 1000000, target, hint, avg_hint);
+}
+EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint);
+
static void scale3d_idle_handler(struct work_struct *work)
{
int notify_idle = 0;
@@ -458,19 +698,6 @@ static void scale3d_idle_handler(struct work_struct *work)
nvhost_scale3d_notify_idle(NULL);
}
-void nvhost_scale3d_reset()
-{
- ktime_t t;
-
- if (!scale3d.enable)
- return;
-
- t = ktime_get();
- mutex_lock(&scale3d.lock);
- reset_scaling_counters(t);
- mutex_unlock(&scale3d.lock);
-}
-
/*
* debugfs parameters to control 3d clock scaling
*/
@@ -495,13 +722,17 @@ void nvhost_scale3d_debug_init(struct dentry *de)
} \
} while (0)
- CREATE_SCALE3D_FILE(fast_response);
+ CREATE_SCALE3D_FILE(estimation_window);
CREATE_SCALE3D_FILE(idle_min);
CREATE_SCALE3D_FILE(idle_max);
- CREATE_SCALE3D_FILE(period);
CREATE_SCALE3D_FILE(adjust);
CREATE_SCALE3D_FILE(scale_emc);
CREATE_SCALE3D_FILE(emc_dip);
+ CREATE_SCALE3D_FILE(use_throughput_hint);
+ CREATE_SCALE3D_FILE(throughput_hi_limit);
+ CREATE_SCALE3D_FILE(throughput_lo_limit);
+ CREATE_SCALE3D_FILE(throughput_lower_limit);
+ CREATE_SCALE3D_FILE(scale_step);
CREATE_SCALE3D_FILE(verbosity);
#undef CREATE_SCALE3D_FILE
}
@@ -532,12 +763,17 @@ static ssize_t enable_3d_scaling_store(struct device *dev,
static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR,
enable_3d_scaling_show, enable_3d_scaling_store);
+#define MAX_FREQ_COUNT 0x40 /* 64 frequencies should be enough for anyone */
+
void nvhost_scale3d_init(struct nvhost_device *d)
{
if (!scale3d.init) {
int error;
unsigned long max_emc, min_emc;
long correction;
+ long rate;
+ int freqs[MAX_FREQ_COUNT];
+
mutex_init(&scale3d.lock);
INIT_WORK(&scale3d.work, scale3d_clocks_handler);
@@ -632,30 +868,74 @@ void nvhost_scale3d_init(struct nvhost_device *d)
POW2(scale3d.max_rate_3d - scale3d.emc_xmid);
scale3d.emc_dip_offset -= correction;
+ scale3d.is_idle = 1;
+
/* set scaling parameter defaults */
scale3d.enable = 1;
- scale3d.period = scale3d.p_period = 100000;
- scale3d.idle_min = scale3d.p_idle_min = 10;
- scale3d.idle_max = scale3d.p_idle_max = 15;
- scale3d.fast_response = scale3d.p_fast_response = 7000;
+ scale3d.idle_min = scale3d.p_idle_min = 100;
+ scale3d.idle_max = scale3d.p_idle_max = 150;
scale3d.p_scale_emc = 1;
scale3d.p_emc_dip = 1;
scale3d.p_verbosity = 0;
scale3d.p_adjust = 1;
+ scale3d.p_use_throughput_hint = 1;
+ scale3d.p_throughput_lower_limit = 940;
+ scale3d.p_throughput_lo_limit = 990;
+ scale3d.p_throughput_hi_limit = 1010;
+ scale3d.p_scale_step = 1;
+ scale3d.p_estimation_window = 8000;
+ scale3d.p_busy_cutoff = 750;
error = device_create_file(&d->dev,
&dev_attr_enable_3d_scaling);
if (error)
dev_err(&d->dev, "failed to create sysfs attributes");
+ rate = 0;
+ scale3d.freq_count = 0;
+ while (rate <= scale3d.max_rate_3d) {
+ long rounded_rate;
+ if (unlikely(scale3d.freq_count == MAX_FREQ_COUNT)) {
+ pr_err("%s: too many frequencies\n", __func__);
+ break;
+ }
+ rounded_rate =
+ clk_round_rate(scale3d.clk_3d, rate);
+ freqs[scale3d.freq_count++] = rounded_rate;
+ rate = rounded_rate + 2000;
+ }
+ scale3d.freqlist =
+ kmalloc(scale3d.freq_count * sizeof(int), GFP_KERNEL);
+ if (scale3d.freqlist == NULL)
+ pr_err("%s: can\'t allocate freq table\n", __func__);
+
+ memcpy(scale3d.freqlist, freqs,
+ scale3d.freq_count * sizeof(int));
+
+ busy_history = score_init(GR3D_FRAME_SPAN);
+ if (busy_history == NULL)
+ pr_err("%s: can\'t init load tracking array\n",
+ __func__);
+
+ hint_history = score_init(GR3D_FRAME_SPAN);
+ if (hint_history == NULL)
+ pr_err("%s: can\'t init throughput tracking array\n",
+ __func__);
+
scale3d.init = 1;
}
-
- nvhost_scale3d_reset();
}
void nvhost_scale3d_deinit(struct nvhost_device *dev)
{
device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling);
scale3d.init = 0;
+ if (scale3d.freqlist != NULL) {
+ kfree(scale3d.freqlist);
+ scale3d.freq_count = 0;
+ scale3d.freqlist = NULL;
+ }
+
+ score_delete(busy_history);
+ score_delete(hint_history);
}
diff --git a/drivers/video/tegra/host/host1x/host1x.c b/drivers/video/tegra/host/host1x/host1x.c
index 33ebc1ff5d22..31899c78065b 100644
--- a/drivers/video/tegra/host/host1x/host1x.c
+++ b/drivers/video/tegra/host/host1x/host1x.c
@@ -308,6 +308,19 @@ static int power_off_host(struct nvhost_device *dev)
return 0;
}
+static void clock_on_host(struct nvhost_device *dev)
+{
+ struct nvhost_master *host = nvhost_get_drvdata(dev);
+ nvhost_intr_start(&host->intr, clk_get_rate(dev->clk[0]));
+}
+
+static int clock_off_host(struct nvhost_device *dev)
+{
+ struct nvhost_master *host = nvhost_get_drvdata(dev);
+ nvhost_intr_stop(&host->intr);
+ return 0;
+}
+
static int __devinit nvhost_user_init(struct nvhost_master *host)
{
int err, devno;
@@ -516,6 +529,8 @@ static struct nvhost_driver nvhost_driver = {
},
.finalize_poweron = power_on_host,
.prepare_poweroff = power_off_host,
+ .finalize_clockon = clock_on_host,
+ .prepare_clockoff = clock_off_host,
};
static int __init nvhost_mod_init(void)
diff --git a/drivers/video/tegra/host/host1x/host1x_cdma.c b/drivers/video/tegra/host/host1x/host1x_cdma.c
index 2e7ff5783a37..5a29ff652efe 100644
--- a/drivers/video/tegra/host/host1x/host1x_cdma.c
+++ b/drivers/video/tegra/host/host1x/host1x_cdma.c
@@ -233,12 +233,15 @@ static void cdma_timeout_cpu_incr(struct nvhost_cdma *cdma, u32 getptr,
/* after CPU incr, ensure shadow is up to date */
nvhost_syncpt_update_min(&dev->syncpt, cdma->timeout.syncpt_id);
- /* update WAITBASE_3D by same number of incrs */
- if (waitbases) {
+ /* Synchronize wait bases. 2D wait bases are synchronized with
+ * syncpoint 19. Hence wait bases are not updated when syncptid=18. */
+
+ if (cdma->timeout.syncpt_id != NVSYNCPT_2D_0 && waitbases) {
void __iomem *p;
p = dev->sync_aperture + host1x_sync_syncpt_base_0_r() +
- (ffs(waitbases) * sizeof(u32));
+ (__ffs(waitbases) * sizeof(u32));
writel(syncval, p);
+ dev->syncpt.base_val[__ffs(waitbases)] = syncval;
}
/* NOP all the PB slots */
@@ -486,7 +489,7 @@ static void cdma_timeout_handler(struct work_struct *work)
/* stop HW, resetting channel/module */
cdma_op().timeout_teardown_begin(cdma);
- nvhost_cdma_update_sync_queue(cdma, sp, dev->dev);
+ nvhost_cdma_update_sync_queue(cdma, sp, ch->dev);
mutex_unlock(&cdma->lock);
}
diff --git a/drivers/video/tegra/host/host1x/host1x_channel.c b/drivers/video/tegra/host/host1x/host1x_channel.c
index 9e9fc25dc966..0274413ff698 100644
--- a/drivers/video/tegra/host/host1x/host1x_channel.c
+++ b/drivers/video/tegra/host/host1x/host1x_channel.c
@@ -365,7 +365,7 @@ static int host1x_channel_read_3d_reg(
if (hwctx_to_save) {
syncpt_incrs += hwctx_to_save->save_incrs;
hwctx_to_save->hwctx.valid = true;
- channel->ctxhandler->get(&hwctx_to_save->hwctx);
+ nvhost_job_get_hwctx(job, &hwctx_to_save->hwctx);
}
channel->cur_ctx = hwctx;
if (channel->cur_ctx && channel->cur_ctx->valid) {
@@ -470,7 +470,8 @@ static int host1x_channel_read_3d_reg(
wait_event(wq,
nvhost_syncpt_is_expired(&nvhost_get_host(channel->dev)->syncpt,
p->syncpt, syncval - 2));
- nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, ref);
+ nvhost_intr_put_ref(&nvhost_get_host(channel->dev)->intr, p->syncpt,
+ ref);
/* Read the register value from FIFO */
err = host1x_drain_read_fifo(channel, value, 1, &pending);
@@ -580,7 +581,6 @@ static int host1x_save_context(struct nvhost_channel *ch)
}
hwctx_to_save->valid = true;
- ch->ctxhandler->get(hwctx_to_save);
ch->cur_ctx = NULL;
syncpt_id = to_host1x_hwctx_handler(hwctx_to_save->h)->syncpt;
@@ -623,7 +623,7 @@ static int host1x_save_context(struct nvhost_channel *ch)
nvhost_syncpt_is_expired(&nvhost_get_host(ch->dev)->syncpt,
syncpt_id, syncpt_val));
- nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, ref);
+ nvhost_intr_put_ref(&nvhost_get_host(ch->dev)->intr, syncpt_id, ref);
nvhost_cdma_update(&ch->cdma);
diff --git a/drivers/video/tegra/host/host1x/host1x_intr.c b/drivers/video/tegra/host/host1x/host1x_intr.c
index 62fd07cbb9ba..facb818a0c24 100644
--- a/drivers/video/tegra/host/host1x/host1x_intr.c
+++ b/drivers/video/tegra/host/host1x/host1x_intr.c
@@ -131,6 +131,16 @@ static void t20_intr_enable_syncpt_intr(struct nvhost_intr *intr, u32 id)
BIT_WORD(id) * REGISTER_STRIDE);
}
+static void t20_intr_disable_syncpt_intr(struct nvhost_intr *intr, u32 id)
+{
+ struct nvhost_master *dev = intr_to_dev(intr);
+ void __iomem *sync_regs = dev->sync_aperture;
+
+ writel(BIT_MASK(id), sync_regs +
+ host1x_sync_syncpt_thresh_int_disable_r() +
+ BIT_WORD(id) * REGISTER_STRIDE);
+}
+
static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr)
{
struct nvhost_master *dev = intr_to_dev(intr);
@@ -140,7 +150,7 @@ static void t20_intr_disable_all_syncpt_intrs(struct nvhost_intr *intr)
for (reg = 0; reg <= BIT_WORD(dev->info.nb_pts) * REGISTER_STRIDE;
reg += REGISTER_STRIDE) {
/* disable interrupts for both cpu's */
- writel(0, sync_regs +
+ writel(0xffffffffu, sync_regs +
host1x_sync_syncpt_thresh_int_disable_r() +
reg);
@@ -276,6 +286,7 @@ static const struct nvhost_intr_ops host1x_intr_ops = {
.set_host_clocks_per_usec = t20_intr_set_host_clocks_per_usec,
.set_syncpt_threshold = t20_intr_set_syncpt_threshold,
.enable_syncpt_intr = t20_intr_enable_syncpt_intr,
+ .disable_syncpt_intr = t20_intr_disable_syncpt_intr,
.disable_all_syncpt_intrs = t20_intr_disable_all_syncpt_intrs,
.request_host_general_irq = t20_intr_request_host_general_irq,
.free_host_general_irq = t20_intr_free_host_general_irq,
diff --git a/drivers/video/tegra/host/mpe/mpe.c b/drivers/video/tegra/host/mpe/mpe.c
index c738700469c6..d76ee0108eef 100644
--- a/drivers/video/tegra/host/mpe/mpe.c
+++ b/drivers/video/tegra/host/mpe/mpe.c
@@ -212,7 +212,7 @@ struct save_info {
unsigned int restore_count;
};
-static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_begin(struct host1x_hwctx_handler *h, u32 *ptr)
{
/* MPE: when done, increment syncpt to base+1 */
ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID, 0, 0);
@@ -229,7 +229,7 @@ static void __init save_begin(struct host1x_hwctx_handler *h, u32 *ptr)
}
#define SAVE_BEGIN_SIZE 5
-static void __init save_direct(u32 *ptr, u32 start_reg, u32 count)
+static void save_direct(u32 *ptr, u32 start_reg, u32 count)
{
ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
host1x_uclass_indoff_r(), 1);
@@ -239,7 +239,7 @@ static void __init save_direct(u32 *ptr, u32 start_reg, u32 count)
}
#define SAVE_DIRECT_SIZE 3
-static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count)
+static void save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count)
{
ptr[0] = nvhost_opcode_setclass(NV_VIDEO_ENCODE_MPEG_CLASS_ID,
cmd_reg, 1);
@@ -247,7 +247,7 @@ static void __init save_set_ram_cmd(u32 *ptr, u32 cmd_reg, u32 count)
}
#define SAVE_SET_RAM_CMD_SIZE 2
-static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg)
+static void save_read_ram_data_nasty(u32 *ptr, u32 data_reg)
{
ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
host1x_uclass_indoff_r(), 1);
@@ -261,7 +261,7 @@ static void __init save_read_ram_data_nasty(u32 *ptr, u32 data_reg)
}
#define SAVE_READ_RAM_DATA_NASTY_SIZE 5
-static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr)
+static void save_end(struct host1x_hwctx_handler *h, u32 *ptr)
{
/* Wait for context read service to finish (cpu incr 3) */
ptr[0] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
@@ -275,7 +275,7 @@ static void __init save_end(struct host1x_hwctx_handler *h, u32 *ptr)
}
#define SAVE_END_SIZE 5
-static void __init setup_save_regs(struct save_info *info,
+static void setup_save_regs(struct save_info *info,
const struct hwctx_reginfo *regs,
unsigned int nr_regs)
{
@@ -304,7 +304,7 @@ static void __init setup_save_regs(struct save_info *info,
info->restore_count = restore_count;
}
-static void __init setup_save_ram_nasty(struct save_info *info, unsigned words,
+static void setup_save_ram_nasty(struct save_info *info, unsigned words,
unsigned cmd_reg, unsigned data_reg)
{
u32 *ptr = info->ptr;
@@ -330,7 +330,7 @@ static void __init setup_save_ram_nasty(struct save_info *info, unsigned words,
info->restore_count = restore_count;
}
-static void __init setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
+static void setup_save(struct host1x_hwctx_handler *h, u32 *ptr)
{
struct save_info info = {
ptr,
@@ -553,7 +553,7 @@ struct nvhost_hwctx_handler *nvhost_mpe_ctxhandler_init(u32 syncpt,
p->save_buf = mem_op().alloc(memmgr, p->save_size * 4, 32,
mem_mgr_flag_write_combine);
- if (IS_ERR(p->save_buf)) {
+ if (IS_ERR_OR_NULL(p->save_buf)) {
p->save_buf = NULL;
return NULL;
}
diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c
index 06005c423a21..5bde55ad2ff5 100644
--- a/drivers/video/tegra/host/nvhost_acm.c
+++ b/drivers/video/tegra/host/nvhost_acm.c
@@ -101,8 +101,17 @@ void nvhost_module_reset(struct nvhost_device *dev)
static void to_state_clockgated_locked(struct nvhost_device *dev)
{
+ struct nvhost_driver *drv = to_nvhost_driver(dev->dev.driver);
+
if (dev->powerstate == NVHOST_POWER_STATE_RUNNING) {
- int i;
+ int i, err;
+ if (drv->prepare_clockoff) {
+ err = drv->prepare_clockoff(dev);
+ if (err) {
+ dev_err(&dev->dev, "error clock gating");
+ return;
+ }
+ }
for (i = 0; i < dev->num_clks; i++)
clk_disable(dev->clk[i]);
if (dev->dev.parent)
@@ -141,6 +150,14 @@ static void to_state_running_locked(struct nvhost_device *dev)
}
}
+ /* Invoke callback after enabling clock. This is used for
+ * re-enabling host1x interrupts. */
+ if (prev_state == NVHOST_POWER_STATE_CLOCKGATED
+ && drv->finalize_clockon)
+ drv->finalize_clockon(dev);
+
+ /* Invoke callback after power un-gating. This is used for
+ * restoring context. */
if (prev_state == NVHOST_POWER_STATE_POWERGATED
&& drv->finalize_poweron)
drv->finalize_poweron(dev);
@@ -343,15 +360,17 @@ void nvhost_module_remove_client(struct nvhost_device *dev, void *priv)
{
int i;
struct nvhost_module_client *m;
+ int found = 0;
mutex_lock(&client_list_lock);
list_for_each_entry(m, &dev->client_list, node) {
if (priv == m->priv) {
list_del(&m->node);
+ found = 1;
break;
}
}
- if (m) {
+ if (found) {
kfree(m);
for (i = 0; i < dev->num_clks; i++)
nvhost_module_update_rate(dev, i);
diff --git a/drivers/video/tegra/host/nvhost_intr.c b/drivers/video/tegra/host/nvhost_intr.c
index 38a04f151e87..9788d32bd4a9 100644
--- a/drivers/video/tegra/host/nvhost_intr.c
+++ b/drivers/video/tegra/host/nvhost_intr.c
@@ -210,7 +210,9 @@ static int process_wait_list(struct nvhost_intr *intr,
remove_completed_waiters(&syncpt->wait_head, threshold, completed);
empty = list_empty(&syncpt->wait_head);
- if (!empty)
+ if (empty)
+ intr_op().disable_syncpt_intr(intr, syncpt->id);
+ else
reset_threshold_interrupt(intr, &syncpt->wait_head,
syncpt->id);
@@ -327,14 +329,20 @@ void *nvhost_intr_alloc_waiter()
GFP_KERNEL|__GFP_REPEAT);
}
-void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref)
+void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref)
{
struct nvhost_waitlist *waiter = ref;
+ struct nvhost_intr_syncpt *syncpt;
+ struct nvhost_master *host = intr_to_dev(intr);
while (atomic_cmpxchg(&waiter->state,
WLS_PENDING, WLS_CANCELLED) == WLS_REMOVED)
schedule();
+ syncpt = intr->syncpt + id;
+ (void)process_wait_list(intr, syncpt,
+ nvhost_syncpt_update_min(&host->syncpt, id));
+
kref_put(&waiter->refcount, waiter_release);
}
diff --git a/drivers/video/tegra/host/nvhost_intr.h b/drivers/video/tegra/host/nvhost_intr.h
index cf0b6b9e8934..d4a6157eced1 100644
--- a/drivers/video/tegra/host/nvhost_intr.h
+++ b/drivers/video/tegra/host/nvhost_intr.h
@@ -104,7 +104,7 @@ void *nvhost_intr_alloc_waiter(void);
* You must call this if you passed non-NULL as ref.
* @ref the ref returned from nvhost_intr_add_action()
*/
-void nvhost_intr_put_ref(struct nvhost_intr *intr, void *ref);
+void nvhost_intr_put_ref(struct nvhost_intr *intr, u32 id, void *ref);
int nvhost_intr_init(struct nvhost_intr *intr, u32 irq_gen, u32 irq_sync);
void nvhost_intr_deinit(struct nvhost_intr *intr);
diff --git a/drivers/video/tegra/host/nvhost_job.c b/drivers/video/tegra/host/nvhost_job.c
index f93d7df1a552..f0f7e64d4504 100644
--- a/drivers/video/tegra/host/nvhost_job.c
+++ b/drivers/video/tegra/host/nvhost_job.c
@@ -34,19 +34,27 @@
/* Magic to use to fill freed handle slots */
#define BAD_MAGIC 0xdeadbeef
-static int job_size(struct nvhost_submit_hdr_ext *hdr)
+static size_t job_size(struct nvhost_submit_hdr_ext *hdr)
{
- int num_relocs = hdr ? hdr->num_relocs : 0;
- int num_waitchks = hdr ? hdr->num_waitchks : 0;
- int num_cmdbufs = hdr ? hdr->num_cmdbufs : 0;
- int num_unpins = num_cmdbufs + num_relocs;
+ s64 num_relocs = hdr ? (int)hdr->num_relocs : 0;
+ s64 num_waitchks = hdr ? (int)hdr->num_waitchks : 0;
+ s64 num_cmdbufs = hdr ? (int)hdr->num_cmdbufs : 0;
+ s64 num_unpins = num_cmdbufs + num_relocs;
+ s64 total;
- return sizeof(struct nvhost_job)
+ if(num_relocs < 0 || num_waitchks < 0 || num_cmdbufs < 0)
+ return 0;
+
+ total = sizeof(struct nvhost_job)
+ num_relocs * sizeof(struct nvhost_reloc)
+ num_relocs * sizeof(struct nvhost_reloc_shift)
+ num_unpins * sizeof(struct mem_handle *)
+ num_waitchks * sizeof(struct nvhost_waitchk)
+ num_cmdbufs * sizeof(struct nvhost_job_gather);
+
+ if(total > ULONG_MAX)
+ return 0;
+ return (size_t)total;
}
static void init_fields(struct nvhost_job *job,
@@ -63,7 +71,11 @@ static void init_fields(struct nvhost_job *job,
job->priority = priority;
job->clientid = clientid;
- /* Redistribute memory to the structs */
+ /*
+ * Redistribute memory to the structs.
+ * Overflows and negative conditions have
+ * already been checked in job_alloc().
+ */
mem += sizeof(struct nvhost_job);
job->relocarray = num_relocs ? mem : NULL;
mem += num_relocs * sizeof(struct nvhost_reloc);
@@ -91,8 +103,11 @@ struct nvhost_job *nvhost_job_alloc(struct nvhost_channel *ch,
int clientid)
{
struct nvhost_job *job = NULL;
+ size_t size = job_size(hdr);
- job = vzalloc(job_size(hdr));
+ if(!size)
+ goto error;
+ job = vzalloc(size);
if (!job)
goto error;
diff --git a/drivers/video/tegra/host/nvhost_syncpt.c b/drivers/video/tegra/host/nvhost_syncpt.c
index 9fa7d0652c1f..38c28ca116e7 100644
--- a/drivers/video/tegra/host/nvhost_syncpt.c
+++ b/drivers/video/tegra/host/nvhost_syncpt.c
@@ -235,7 +235,7 @@ int nvhost_syncpt_wait_timeout(struct nvhost_syncpt *sp, u32 id,
check_count++;
}
}
- nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), ref);
+ nvhost_intr_put_ref(&(syncpt_to_dev(sp)->intr), id, ref);
done:
nvhost_module_idle(syncpt_to_dev(sp)->dev);
@@ -344,7 +344,7 @@ static ssize_t syncpt_min_show(struct kobject *kobj,
struct nvhost_syncpt_attr *syncpt_attr =
container_of(attr, struct nvhost_syncpt_attr, attr);
- return snprintf(buf, PAGE_SIZE, "%d",
+ return snprintf(buf, PAGE_SIZE, "%u",
nvhost_syncpt_read(&syncpt_attr->host->syncpt,
syncpt_attr->id));
}
@@ -355,7 +355,7 @@ static ssize_t syncpt_max_show(struct kobject *kobj,
struct nvhost_syncpt_attr *syncpt_attr =
container_of(attr, struct nvhost_syncpt_attr, attr);
- return snprintf(buf, PAGE_SIZE, "%d",
+ return snprintf(buf, PAGE_SIZE, "%u",
nvhost_syncpt_read_max(&syncpt_attr->host->syncpt,
syncpt_attr->id));
}
diff --git a/drivers/video/tegra/host/t30/t30.c b/drivers/video/tegra/host/t30/t30.c
index 0c8d626a4d67..334d598d5c0b 100644
--- a/drivers/video/tegra/host/t30/t30.c
+++ b/drivers/video/tegra/host/t30/t30.c
@@ -142,7 +142,7 @@ static struct nvhost_device tegra_gr2d02_device = {
.waitbases = BIT(NVWAITBASE_2D_0) | BIT(NVWAITBASE_2D_1),
.modulemutexes = BIT(NVMODMUTEX_2D_FULL) | BIT(NVMODMUTEX_2D_SIMPLE) |
BIT(NVMODMUTEX_2D_SB_A) | BIT(NVMODMUTEX_2D_SB_B),
- .clocks = { {"gr2d", UINT_MAX},
+ .clocks = { {"gr2d", 0},
{"epp", 0},
{"emc", 300000000} },
NVHOST_MODULE_NO_POWERGATE_IDS,