video: tegra: nvhost: add submit timeout support

In this change, nvhost_cdma starts a timer (if a timeout is specified in the userctx), for the buffer at the head of the sync_queue that has not reached its syncpt threshold. If the timeout fires, nvhost_cdma initiates a channel / module reset. It then detects up to where in the sync_queue it stopped execution (based on the current HW syncpt value). For any remaining uncompleted buffers in the context, nvhost_cdma NOPs the entry and CPU incrs the syncpt to where it should be had it completed. If one of the sync_queue entries belongs to another context, it still does the syncpt incrs for this context, but via the PB as a GATHER opcode, At the end, CDMA is restarted, so buffers are refetched (either with NOP slots, or GATHERs to incr syncpts). This appears as though the buffer has completed (and the associated resources released). For testing, debugfs entries have been added under /d/tegra_nvhost force_timeout_val - set the timeout value, in ms force_timeout_channel - channel ID, were timeouts checks occur force_timeout_pid - process ID to set the userctx The idea is to set the timeout_val, then the timeout_channel (e.g. for 3D, the channel ID is 1) and then the process ID, gotten from running adb shell ps. Bug 625545 Original-Change-Id: I659e9255f1105f3439ce23e9169a19739b83ea52 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/42655 Reviewed-by: Scott Williams <scwilliams@nvidia.com> Reviewed-by: Varun Colbert <vcolbert@nvidia.com> Tested-by: Varun Colbert <vcolbert@nvidia.com> Rebase-Id: R89759c129e2db8f7dbf83a6066fc29947f95cc27
author: Chris Johnson <cwj@nvidia.com> 2011-08-12 09:04:09 +0300
committer: Dan Willemsen <dwillemsen@nvidia.com> 2011-11-30 21:48:21 -0800
commit: b33dead2cc3e262000ba1915b19c27b6b0b87f41 (patch)
tree: 587b2f7261c8f1401e1aaa737d3f901eb8d3b49b /drivers
parent: 14dcbce77792e27a94adb242650c50809435af30 (diff)
18 files changed, 1045 insertions, 103 deletions
diff --git a/drivers/video/tegra/host/chip_support.h b/drivers/video/tegra/host/chip_support.h
index eae99671a20d..16dd55dc0f2b 100644
--- a/drivers/video/tegra/host/chip_support.h
+++ b/drivers/video/tegra/host/chip_support.h
@@ -24,6 +24,7 @@
 
 struct output;
 struct nvhost_waitchk;
+struct nvhost_userctx_timeout;
 
 struct nvhost_chip_support {
 	struct {
@@ -42,10 +43,12 @@ struct nvhost_chip_support {
 			      int nr_unpins,
 			      u32 syncpt_id,
 			      u32 syncpt_incrs,
+			      struct nvhost_userctx_timeout *timeout,
 			      u32 *syncpt_value,
 			      bool null_kickoff);
 		int (*read3dreg)(struct nvhost_channel *channel,
 				struct nvhost_hwctx *hwctx,
+				struct nvhost_userctx_timeout *timeout,
 				u32 offset,
 				u32 *value);
 	} channel;
@@ -54,6 +57,24 @@ struct nvhost_chip_support {
 		void (*start)(struct nvhost_cdma *);
 		void (*stop)(struct nvhost_cdma *);
 		void (*kick)(struct  nvhost_cdma *);
+		int (*timeout_init)(struct nvhost_cdma *,
+				    u32 syncpt_id);
+		void (*timeout_destroy)(struct nvhost_cdma *);
+		void (*timeout_teardown_begin)(struct nvhost_cdma *);
+		void (*timeout_teardown_end)(struct nvhost_cdma *,
+					     u32 getptr);
+		void (*timeout_cpu_incr)(struct nvhost_cdma *,
+					 u32 getptr,
+					 u32 syncpt_incrs,
+					 u32 nr_slots);
+		void (*timeout_pb_incr)(struct nvhost_cdma *,
+					u32 getptr,
+					u32 syncpt_incrs,
+					u32 nr_slots,
+					bool exec_ctxsave);
+		void (*timeout_clear_ctxsave)(struct nvhost_cdma *,
+					u32 getptr,
+					u32 nr_slots);
 	} cdma;
 
 	struct {
diff --git a/drivers/video/tegra/host/debug.c b/drivers/video/tegra/host/debug.c
index a7ff51aed08b..8892a0072480 100644
--- a/drivers/video/tegra/host/debug.c
+++ b/drivers/video/tegra/host/debug.c
@@ -27,6 +27,10 @@
 
 pid_t nvhost_debug_null_kickoff_pid;
 
+pid_t nvhost_debug_force_timeout_pid;
+u32 nvhost_debug_force_timeout_val;
+u32 nvhost_debug_force_timeout_channel;
+
 void nvhost_debug_output(struct output *o, const char* fmt, ...)
 {
 	va_list args;
@@ -113,6 +117,12 @@ void nvhost_debug_init(struct nvhost_master *master)
 			&nvhost_debug_null_kickoff_pid);
 
 	nvhost_debug_scale_init(de);
+	debugfs_create_u32("force_timeout_pid", S_IRUGO|S_IWUSR, de,
+			&nvhost_debug_force_timeout_pid);
+	debugfs_create_u32("force_timeout_val", S_IRUGO|S_IWUSR, de,
+			&nvhost_debug_force_timeout_val);
+	debugfs_create_u32("force_timeout_channel", S_IRUGO|S_IWUSR, de,
+			&nvhost_debug_force_timeout_channel);
 }
 #else
 void nvhost_debug_init(struct nvhost_master *master)
diff --git a/drivers/video/tegra/host/debug.h b/drivers/video/tegra/host/debug.h
index 81017fe8d2a1..d3adcc60cf04 100644
--- a/drivers/video/tegra/host/debug.h
+++ b/drivers/video/tegra/host/debug.h
@@ -23,6 +23,7 @@
 #define __NVHOST_DEBUG_H
 
 #include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 struct output {
 	void (*fn)(void *ctx, const char* str, size_t len);
@@ -43,4 +44,8 @@ static inline void write_to_printk(void *ctx, const char* str, size_t len)
 void nvhost_debug_output(struct output *o, const char* fmt, ...);
 
 void nvhost_debug_scale_init(struct dentry *de);
+extern pid_t nvhost_debug_force_timeout_pid;
+extern u32 nvhost_debug_force_timeout_val;
+extern u32 nvhost_debug_force_timeout_channel;
+
 #endif /*__NVHOST_DEBUG_H */
diff --git a/drivers/video/tegra/host/dev.c b/drivers/video/tegra/host/dev.c
index 37d18bc2d175..5869e6f9805d 100644
--- a/drivers/video/tegra/host/dev.c
+++ b/drivers/video/tegra/host/dev.c
@@ -43,6 +43,7 @@
 #include <mach/gpufuse.h>
 
 #include "nvhost_scale.h"
+#include "debug.h"
 
 #define DRIVER_NAME "tegra_grhost"
 #define IFACE_NAME "nvhost"
@@ -64,6 +65,7 @@ struct nvhost_channel_userctx {
 	struct nvmap_client *nvmap;
 	struct nvhost_waitchk waitchks[NVHOST_MAX_WAIT_CHECKS];
 	struct nvhost_waitchk *cur_waitchk;
+	struct nvhost_userctx_timeout timeout;
 };
 
 struct nvhost_ctrl_userctx {
@@ -126,6 +128,7 @@ static int nvhost_channelopen(struct inode *inode, struct file *filp)
 		priv->hwctx = ch->ctxhandler.alloc(ch);
 		if (!priv->hwctx)
 			goto fail;
+		priv->hwctx->timeout = &priv->timeout;
 	}
 
 	priv->gathers = nvmap_mmap(priv->gather_mem);
@@ -312,6 +315,12 @@ static int nvhost_ioctl_channel_flush(
 	if (nvhost_debug_null_kickoff_pid == current->tgid)
 		null_kickoff = 1;
 
+	if ((nvhost_debug_force_timeout_pid == current->tgid) &&
+	    (nvhost_debug_force_timeout_channel == ctx->ch->chid)) {
+		ctx->timeout.timeout = nvhost_debug_force_timeout_val;
+	}
+	ctx->timeout.syncpt_id = ctx->hdr.syncpt_id;
+
 	/* context switch if needed, and submit user's gathers to the channel */
 	BUG_ON(!channel_op(ctx->ch).submit);
 	err = channel_op(ctx->ch).submit(ctx->ch, ctx->hwctx, ctx->nvmap,
@@ -320,6 +329,7 @@ static int nvhost_ioctl_channel_flush(
 				ctx->hdr.waitchk_mask,
 				ctx->unpinarray, num_unpin,
 				ctx->hdr.syncpt_id, ctx->hdr.syncpt_incrs,
+				&ctx->timeout,
 				&args->value,
 				null_kickoff);
 	if (err)
@@ -334,7 +344,8 @@ static int nvhost_ioctl_channel_read_3d_reg(
 {
 	BUG_ON(!channel_op(ctx->ch).read3dreg);
 	return channel_op(ctx->ch).read3dreg(ctx->ch, ctx->hwctx,
-					args->offset, &args->value);
+			&ctx->timeout,
+			args->offset, &args->value);
 }
 
 static long nvhost_channelctl(struct file *filp,
@@ -447,6 +458,17 @@ static long nvhost_channelctl(struct file *filp,
 		err = nvhost_module_set_rate(&priv->ch->mod, priv, rate, 0);
 		break;
 	}
+	case NVHOST_IOCTL_CHANNEL_SET_TIMEOUT:
+		priv->timeout.timeout =
+			(u32)((struct nvhost_set_timeout_args *)buf)->timeout;
+		dev_dbg(&priv->ch->dev->pdev->dev,
+			"%s: setting buffer timeout (%d ms) for userctx 0x%p\n",
+			__func__, priv->timeout.timeout, priv);
+		break;
+	case NVHOST_IOCTL_CHANNEL_GET_TIMEDOUT:
+		((struct nvhost_get_param_args *)buf)->value =
+				priv->timeout.has_timedout;
+		break;
 	default:
 		err = -ENOTTY;
 		break;
@@ -678,10 +700,6 @@ static void power_host(struct nvhost_module *mod, enum nvhost_power_action actio
 
 	if (action == NVHOST_POWER_ACTION_ON) {
 		nvhost_intr_start(&dev->intr, clk_get_rate(mod->clk[0]));
-		/* don't do it, as display may have changed syncpt
-		 * after the last save
-		 * nvhost_syncpt_reset(&dev->syncpt);
-		 */
 	} else if (action == NVHOST_POWER_ACTION_OFF) {
 		int i;
 		for (i = 0; i < dev->nb_channels; i++)
@@ -939,7 +957,6 @@ static int __devinit nvhost_probe(struct platform_device *pdev)
 		}
 	}
 
-
 	err = nvhost_cpuaccess_init(&host->cpuaccess, pdev);
 	if (err)
 		goto fail;
@@ -976,7 +993,6 @@ fail:
 	nvhost_remove_chip_support(host);
 	if (host->nvmap)
 		nvmap_client_put(host->nvmap);
-	/* TODO: [ahatala 2010-05-04] */
 	kfree(host);
 	return err;
 }
@@ -986,7 +1002,6 @@ static int __exit nvhost_remove(struct platform_device *pdev)
 	struct nvhost_master *host = platform_get_drvdata(pdev);
 	nvhost_remove_chip_support(host);
 	nvhost_remove_sysfs(&pdev->dev);
-	/*kfree(host);?*/
 	return 0;
 }
 
diff --git a/drivers/video/tegra/host/dev.h b/drivers/video/tegra/host/dev.h
index ee79bddf05d5..02b248b38d62 100644
--- a/drivers/video/tegra/host/dev.h
+++ b/drivers/video/tegra/host/dev.h
@@ -31,6 +31,7 @@
 #include "chip_support.h"
 
 #define NVHOST_MAJOR 0 /* dynamic */
+struct nvhost_hwctx;
 
 struct nvhost_master {
 	void __iomem *aperture;
@@ -56,6 +57,13 @@ struct nvhost_master {
 	struct nvhost_chip_support op;
 };
 
+struct nvhost_userctx_timeout {
+	u32 timeout;
+	bool has_timedout;
+	struct nvhost_hwctx *hwctx;
+	int syncpt_id;
+};
+
 void nvhost_debug_init(struct nvhost_master *master);
 void nvhost_debug_dump(struct nvhost_master *master);
 
diff --git a/drivers/video/tegra/host/nvhost_acm.c b/drivers/video/tegra/host/nvhost_acm.c
index 164617c7229d..9caaf4817257 100644
--- a/drivers/video/tegra/host/nvhost_acm.c
+++ b/drivers/video/tegra/host/nvhost_acm.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/err.h>
 #include <linux/device.h>
+#include <linux/delay.h>
 #include <mach/powergate.h>
 #include <mach/clk.h>
 #include <mach/hardware.h>
@@ -34,6 +35,47 @@
 
 #define ACM_POWERDOWN_HANDLER_DELAY_MSEC  25
 #define ACM_SUSPEND_WAIT_FOR_IDLE_TIMEOUT (2 * HZ)
+#define POWERGATE_DELAY 10
+
+void nvhost_module_reset(struct nvhost_module *mod)
+{
+	struct nvhost_master *dev;
+	dev = container_of(mod, struct nvhost_channel, mod)->dev;
+
+	dev_dbg(&dev->pdev->dev,
+		"%s: asserting %s module reset (id %d, id2 %d)\n",
+		__func__, mod->name,
+		mod->powergate_id, mod->powergate_id2);
+
+	/* assert module and mc client reset */
+	if (mod->powergate_id != -1) {
+		tegra_powergate_mc_disable(mod->powergate_id);
+		tegra_periph_reset_assert(mod->clk[0]);
+		tegra_powergate_mc_flush(mod->powergate_id);
+	}
+	if (mod->powergate_id2 != -1) {
+		tegra_powergate_mc_disable(mod->powergate_id2);
+		tegra_periph_reset_assert(mod->clk[1]);
+		tegra_powergate_mc_flush(mod->powergate_id2);
+	}
+
+	udelay(POWERGATE_DELAY);
+
+	/* deassert reset */
+	if (mod->powergate_id != -1) {
+		tegra_powergate_mc_flush_done(mod->powergate_id);
+		tegra_periph_reset_deassert(mod->clk[0]);
+		tegra_powergate_mc_enable(mod->powergate_id);
+	}
+	if (mod->powergate_id2 != -1) {
+		tegra_powergate_mc_flush_done(mod->powergate_id2);
+		tegra_periph_reset_deassert(mod->clk[1]);
+		tegra_powergate_mc_enable(mod->powergate_id2);
+	}
+
+	dev_dbg(&dev->pdev->dev, "%s: module %s out of reset\n",
+		__func__, mod->name);
+}
 
 void nvhost_module_busy(struct nvhost_module *mod)
 {
@@ -43,13 +85,15 @@ void nvhost_module_busy(struct nvhost_module *mod)
 		int i = 0;
 		if (mod->parent)
 			nvhost_module_busy(mod->parent);
-		if (mod->powergate_id != -1)
-			tegra_unpowergate_partition(mod->powergate_id);
-		if (mod->powergate_id2 != -1)
-			tegra_unpowergate_partition(mod->powergate_id2);
+		if (mod->can_powergate) {
+			if (mod->powergate_id != -1)
+				tegra_unpowergate_partition(mod->powergate_id);
+			if (mod->powergate_id2 != -1)
+				tegra_unpowergate_partition(mod->powergate_id2);
+		}
 		while (i < mod->num_clks)
 			clk_enable(mod->clk[i++]);
-		if (mod->func)
+		if (mod->can_powergate && mod->func)
 			mod->func(mod, NVHOST_POWER_ACTION_ON);
 		mod->powered = true;
 	}
@@ -68,12 +112,12 @@ static void powerdown_handler(struct work_struct *work)
 			mod->func(mod, NVHOST_POWER_ACTION_OFF);
 		for (i = 0; i < mod->num_clks; i++)
 			clk_disable(mod->clk[i]);
-		if (mod->powergate_id != -1)
-			tegra_powergate_partition(mod->powergate_id);
-
-		if (mod->powergate_id2 != -1)
-			tegra_powergate_partition(mod->powergate_id2);
-
+		if (mod->can_powergate) {
+			if (mod->powergate_id != -1)
+				tegra_powergate_partition(mod->powergate_id);
+			if (mod->powergate_id2 != -1)
+				tegra_powergate_partition(mod->powergate_id2);
+		}
 		mod->powered = false;
 		if (mod->parent)
 			nvhost_module_idle(mod->parent);
@@ -493,23 +537,30 @@ int nvhost_module_init(struct nvhost_module *mod, const char *name,
 	mod->num_clks = i;
 	mod->func = func;
 	mod->parent = parent;
+	mod->can_powergate = false;
 	mod->powered = false;
 	mod->powergate_id = -1;
 	mod->powergate_id2 = -1;
+	mod->powerdown_delay = ACM_POWERDOWN_HANDLER_DELAY_MSEC;
+
 	if (strcmp(name, "gr2d") == 0)
 		mod->powerdown_delay = 0;
-	else
-		mod->powerdown_delay = ACM_POWERDOWN_HANDLER_DELAY_MSEC;
-
-	if (strcmp(name, "gr3d") == 0) {
+	else if (strcmp(name, "gr3d") == 0) {
+		mod->can_powergate = !_3d_powergating_disabled();
 		if (!scale3d.init)
 			scale3d_init(mod);
 		mod->powergate_id = TEGRA_POWERGATE_3D;
+		if (!mod->can_powergate)
+			tegra_unpowergate_partition(mod->powergate_id);
 #ifdef CONFIG_ARCH_TEGRA_3x_SOC
 		mod->powergate_id2 = TEGRA_POWERGATE_3D1;
+		if (!mod->can_powergate)
+			tegra_unpowergate_partition(mod->powergate_id2);
 #endif
-	} else if (strcmp(name, "mpe") == 0)
+	} else if (strcmp(name, "mpe") == 0) {
+		mod->can_powergate = true;
 		mod->powergate_id = TEGRA_POWERGATE_MPE;
+	}
 
 	if (mod->powergate_id == TEGRA_POWERGATE_MPE
 		&& _mpe_powergating_disabled()) {
diff --git a/drivers/video/tegra/host/nvhost_acm.h b/drivers/video/tegra/host/nvhost_acm.h
index 6f3011e343cd..42bc89ab9d4b 100644
--- a/drivers/video/tegra/host/nvhost_acm.h
+++ b/drivers/video/tegra/host/nvhost_acm.h
@@ -56,6 +56,7 @@ struct nvhost_module {
 	atomic_t refcount;
 	wait_queue_head_t idle;
 	struct nvhost_module *parent;
+	bool can_powergate;
 	int powergate_id;
 	int powergate_id2;
 	int powerdown_delay;
@@ -68,6 +69,7 @@ int nvhost_module_init(struct nvhost_module *mod, const char *name,
 void nvhost_module_deinit(struct nvhost_module *mod);
 void nvhost_module_suspend(struct nvhost_module *mod, bool system_suspend);
 
+void nvhost_module_reset(struct nvhost_module *mod);
 void nvhost_module_busy(struct nvhost_module *mod);
 void nvhost_module_idle_mult(struct nvhost_module *mod, int refs);
 int nvhost_module_add_client(struct nvhost_module *mod, void *priv);
diff --git a/drivers/video/tegra/host/nvhost_cdma.c b/drivers/video/tegra/host/nvhost_cdma.c
index fcce8334f272..b125f76414cc 100644
--- a/drivers/video/tegra/host/nvhost_cdma.c
+++ b/drivers/video/tegra/host/nvhost_cdma.c
@@ -23,8 +23,11 @@
 #include "nvhost_cdma.h"
 #include "dev.h"
 #include <asm/cacheflush.h>
+
 #include <linux/slab.h>
 #include <trace/events/nvhost.h>
+#include <linux/interrupt.h>
+
 /*
  * TODO:
  *   stats
@@ -38,10 +41,13 @@
  * The sync queue is a circular buffer of u32s interpreted as:
  *   0: SyncPointID
  *   1: SyncPointValue
- *   2: NumSlots (how many pushbuffer slots to free)
- *   3: NumHandles
- *   4: nvmap client which pinned the handles
- *   5..: NumHandles * nvmemhandle to unpin
+ *   2: FirstDMAGet (start of submit in pushbuffer)
+ *   3: Timeout (time to live for this submit)
+ *   4: TimeoutContext (userctx that submitted buffer)
+ *   5: NumSlots (how many pushbuffer slots to free)
+ *   6: NumHandles
+ *   7: nvmap client which pinned the handles
+ *   8..: NumHandles * nvmemhandle to unpin
  *
  * There's always one word unused, so (accounting for wrap):
  *   - Write == Read => queue empty
@@ -55,7 +61,7 @@
  */
 
 /* Number of words needed to store an entry containing one handle */
-#define SYNC_QUEUE_MIN_ENTRY (4 + (2 * sizeof(void *) / sizeof(u32)))
+#define SYNC_QUEUE_MIN_ENTRY (SQ_IDX_HANDLES + (sizeof(void *)/4))
 
 /**
  * Reset to empty queue.
@@ -120,20 +126,46 @@ static unsigned int sync_queue_space(struct sync_queue *queue)
 }
 
 /**
+ * Debug routine used to dump sync_queue entries
+ */
+static void dump_sync_queue_entry(struct nvhost_cdma *cdma, u32 *entry)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+
+	dev_dbg(&dev->pdev->dev, "sync_queue index 0x%x\n",
+		(entry - cdma->sync_queue.buffer));
+	dev_dbg(&dev->pdev->dev, "    SYNCPT_ID   %d\n",
+		entry[SQ_IDX_SYNCPT_ID]);
+	dev_dbg(&dev->pdev->dev, "    SYNCPT_VAL  %d\n",
+		entry[SQ_IDX_SYNCPT_VAL]);
+	dev_dbg(&dev->pdev->dev, "    FIRST_GET   0x%x\n",
+		entry[SQ_IDX_FIRST_GET]);
+	dev_dbg(&dev->pdev->dev, "    TIMEOUT     %d\n",
+		entry[SQ_IDX_TIMEOUT]);
+	dev_dbg(&dev->pdev->dev, "    TIMEOUT_CTX 0x%x\n",
+		entry[SQ_IDX_TIMEOUT_CTX]);
+	dev_dbg(&dev->pdev->dev, "    NUM_SLOTS   %d\n",
+		entry[SQ_IDX_NUM_SLOTS]);
+	dev_dbg(&dev->pdev->dev, "    NUM_HANDLES %d\n",
+		entry[SQ_IDX_NUM_HANDLES]);
+}
+
+/**
  * Add an entry to the sync queue.
  */
-#define entry_size(_cnt)	((1 + _cnt)*sizeof(void *)/sizeof(u32))
+#define entry_size(_cnt)	((_cnt)*sizeof(void *)/sizeof(u32))
 
 static void add_to_sync_queue(struct sync_queue *queue,
 			      u32 sync_point_id, u32 sync_point_value,
 			      u32 nr_slots, struct nvmap_client *user_nvmap,
-			      struct nvmap_handle **handles, u32 nr_handles)
+			      struct nvmap_handle **handles, u32 nr_handles,
+			      u32 first_get,
+			      struct nvhost_userctx_timeout *timeout)
 {
 	struct nvhost_cdma *cdma;
 	struct nvhost_master *host;
-	u32 write = queue->write;
+	u32 size, write = queue->write;
 	u32 *p = queue->buffer + write;
-	u32 size = 4 + (entry_size(nr_handles));
 
 	cdma = container_of(queue, struct nvhost_cdma, sync_queue);
 	host = cdma_to_dev(cdma);
@@ -141,20 +173,29 @@ static void add_to_sync_queue(struct sync_queue *queue,
 	BUG_ON(sync_point_id == NVSYNCPT_INVALID);
 	BUG_ON(sync_queue_space(queue) < nr_handles);
 
+	size  = SQ_IDX_HANDLES;
+	size += entry_size(nr_handles);
+
 	write += size;
 	BUG_ON(write > host->sync_queue_size);
 
-	*p++ = sync_point_id;
-	*p++ = sync_point_value;
-	*p++ = nr_slots;
-	*p++ = nr_handles;
-	BUG_ON(!user_nvmap);
-	*(struct nvmap_client **)p = nvmap_client_get(user_nvmap);
+	p[SQ_IDX_SYNCPT_ID] = sync_point_id;
+	p[SQ_IDX_SYNCPT_VAL] = sync_point_value;
+	p[SQ_IDX_FIRST_GET] = first_get;
+	p[SQ_IDX_TIMEOUT] = timeout->timeout;
+	p[SQ_IDX_NUM_SLOTS] = nr_slots;
+	p[SQ_IDX_NUM_HANDLES] = nr_handles;
 
-	p = (u32 *)((void *)p + sizeof(struct nvmap_client *));
+	*(void **)(&p[SQ_IDX_TIMEOUT_CTX]) = timeout;
 
-	if (nr_handles)
-		memcpy(p, handles, nr_handles * sizeof(struct nvmap_handle *));
+	BUG_ON(!user_nvmap);
+	*(struct nvmap_client **)(&p[SQ_IDX_NVMAP_CTX]) =
+		nvmap_client_get(user_nvmap);
+
+	if (nr_handles) {
+		memcpy(&p[SQ_IDX_HANDLES], handles,
+			(nr_handles * sizeof(struct nvmap_handle *)));
+	}
 
 	/* If there's not enough room for another entry, wrap to the start. */
 	if ((write + SYNC_QUEUE_MIN_ENTRY) > host->sync_queue_size) {
@@ -165,7 +206,6 @@ static void add_to_sync_queue(struct sync_queue *queue,
 		BUG_ON(queue->read == 0);
 		write = 0;
 	}
-
 	queue->write = write;
 }
 
@@ -205,7 +245,8 @@ dequeue_sync_queue_head(struct sync_queue *queue)
 
 	BUG_ON(read == queue->write);
 
-	size = 4 + entry_size(queue->buffer[read + 3]);
+	size  = SQ_IDX_HANDLES;
+	size += entry_size(queue->buffer[read + SQ_IDX_NUM_HANDLES]);
 
 	read += size;
 	BUG_ON(read > host->sync_queue_size);
@@ -213,12 +254,9 @@ dequeue_sync_queue_head(struct sync_queue *queue)
 	/* If there's not enough room for another entry, wrap to the start. */
 	if ((read + SYNC_QUEUE_MIN_ENTRY) > host->sync_queue_size)
 		read = 0;
-
 	queue->read = read;
 }
 
-
-
 /**
  * Return the status of the cdma's sync queue or push buffer for the given event
  *  - sq empty: returns 1 for empty, 0 for not empty (as in "1 empty queue" :-)
@@ -269,6 +307,40 @@ unsigned int nvhost_cdma_wait(struct nvhost_cdma *cdma, enum cdma_event event)
 		down(&cdma->sem);
 		mutex_lock(&cdma->lock);
 	}
+	return 0;
+}
+
+/**
+ * Start timer for a buffer submition that has completed yet.
+ * Must be called with the cdma lock held.
+ */
+void nvhost_cdma_start_timer(struct nvhost_cdma *cdma, u32 syncpt_id,
+				u32 syncpt_val,
+				struct nvhost_userctx_timeout *timeout)
+{
+	BUG_ON(!timeout);
+	if (cdma->timeout.ctx_timeout) {
+		/* timer already started */
+		return;
+	}
+
+	cdma->timeout.ctx_timeout = timeout;
+	cdma->timeout.syncpt_id = syncpt_id;
+	cdma->timeout.syncpt_val = syncpt_val;
+	cdma->timeout.start_ktime = ktime_get();
+
+	schedule_delayed_work(&cdma->timeout.wq,
+			msecs_to_jiffies(timeout->timeout));
+}
+
+/**
+ * Stop timer when a buffer submition completes.
+ * Must be called with the cdma lock held.
+ */
+static void stop_cdma_timer(struct nvhost_cdma *cdma)
+{
+	cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.ctx_timeout = NULL;
 }
 
 /**
@@ -294,7 +366,10 @@ static void update_cdma(struct nvhost_cdma *cdma)
 	 */
 	for (;;) {
 		u32 syncpt_id, syncpt_val;
+		u32 timeout;
+		struct nvhost_userctx_timeout *timeout_ref = NULL;
 		unsigned int nr_slots, nr_handles;
+		struct nvhost_syncpt *sp = &dev->syncpt;
 		struct nvmap_handle **handles;
 		struct nvmap_client *nvmap;
 		u32 *sync;
@@ -306,26 +381,37 @@ static void update_cdma(struct nvhost_cdma *cdma)
 			break;
 		}
 
-		syncpt_id = *sync++;
-		syncpt_val = *sync++;
+		syncpt_id = sync[SQ_IDX_SYNCPT_ID];
+		syncpt_val = sync[SQ_IDX_SYNCPT_VAL];
+		timeout = sync[SQ_IDX_TIMEOUT];
+		timeout_ref = (struct nvhost_userctx_timeout *)
+				sync[SQ_IDX_TIMEOUT_CTX];
 
 		BUG_ON(syncpt_id == NVSYNCPT_INVALID);
 
 		/* Check whether this syncpt has completed, and bail if not */
-		if (!nvhost_syncpt_min_cmp(&dev->syncpt, syncpt_id, syncpt_val))
+		if (!nvhost_syncpt_min_cmp(sp, syncpt_id, syncpt_val)) {
+			/* Start timer on next pending syncpt */
+			if (timeout) {
+				nvhost_cdma_start_timer(cdma, syncpt_id,
+					syncpt_val, timeout_ref);
+			}
 			break;
+		}
 
-		nr_slots = *sync++;
-		nr_handles = *sync++;
-		nvmap = *(struct nvmap_client **)sync;
-		sync = ((void *)sync + sizeof(struct nvmap_client *));
-		handles = (struct nvmap_handle **)sync;
+		/* Cancel timeout, when a buffer completes */
+		if (cdma->timeout.ctx_timeout)
+			stop_cdma_timer(cdma);
+
+		nr_slots = sync[SQ_IDX_NUM_SLOTS];
+		nr_handles = sync[SQ_IDX_NUM_HANDLES];
+		nvmap = (struct nvmap_client *)sync[SQ_IDX_NVMAP_CTX];
+		handles = (struct nvmap_handle **)&sync[SQ_IDX_HANDLES];
 
 		BUG_ON(!nvmap);
 
 		/* Unpin the memory */
 		nvmap_unpin_handles(nvmap, handles, nr_handles);
-
 		nvmap_client_put(nvmap);
 
 		/* Pop push buffer slots */
@@ -349,6 +435,168 @@ static void update_cdma(struct nvhost_cdma *cdma)
 	}
 }
 
+static u32 *advance_next_entry(struct nvhost_cdma *cdma, u32 *read)
+{
+	struct nvhost_master *host;
+	u32 ridx;
+
+	host = cdma_to_dev(cdma);
+
+	/* move sync_queue read ptr to next entry */
+	ridx = (read - cdma->sync_queue.buffer);
+	ridx += (SQ_IDX_HANDLES + entry_size(read[SQ_IDX_NUM_HANDLES]));
+	if ((ridx + SYNC_QUEUE_MIN_ENTRY) > host->sync_queue_size)
+		ridx = 0;
+
+	/* return sync_queue entry */
+	return cdma->sync_queue.buffer + ridx;
+}
+
+void nvhost_cdma_update_sync_queue(struct nvhost_cdma *cdma,
+		struct nvhost_syncpt *syncpt, struct device *dev)
+{
+	u32 first_get, get_restart;
+	u32 syncpt_incrs, nr_slots;
+	bool clear_ctxsave, exec_ctxsave;
+	struct sync_queue *queue = &cdma->sync_queue;
+	u32 *sync = sync_queue_head(queue);
+	u32 syncpt_val = nvhost_syncpt_update_min(syncpt,
+			cdma->timeout.syncpt_id);
+
+	dev_dbg(dev,
+		"%s: starting cleanup (thresh %d, queue rd 0x%x wr 0x%x)\n",
+		__func__,
+		syncpt_val, queue->read, queue->write);
+
+	/*
+	 * Move the sync_queue read pointer to the first entry that hasn't
+	 * completed based on the current HW syncpt value. It's likely there
+	 * won't be any (i.e. we're still at the head), but covers the case
+	 * where a syncpt incr happens just prior/during the teardown.
+	 */
+
+	dev_dbg(dev,
+		"%s: skip completed buffers still in sync_queue\n",
+		__func__);
+
+	while (sync != (queue->buffer + queue->write)) {
+		/* move read ptr to first blocked entry */
+		if (syncpt_val < sync[SQ_IDX_SYNCPT_VAL])
+			break;	/* not completed */
+
+		dump_sync_queue_entry(cdma, sync);
+		sync = advance_next_entry(cdma, sync);
+	}
+
+	/*
+	 * Walk the sync_queue, first incrementing with the CPU syncpts that
+	 * are partially executed (the first buffer) or fully skipped while
+	 * still in the current context (slots are also NOP-ed).
+	 *
+	 * At the point contexts are interleaved, syncpt increments must be
+	 * done inline with the pushbuffer from a GATHER buffer to maintain
+	 * the order (slots are modified to be a GATHER of syncpt incrs).
+	 *
+	 * Note: save in get_restart the location where the timed out buffer
+	 * started in the PB, so we can start the refetch from there (with the
+	 * modified NOP-ed PB slots). This lets things appear to have completed
+	 * properly for this buffer and resources are freed.
+	 */
+
+	dev_dbg(dev,
+		"%s: perform CPU incr on pending same ctx buffers\n",
+		__func__);
+
+	get_restart = cdma->last_put;
+	if (sync != (queue->buffer + queue->write))
+		get_restart = sync[SQ_IDX_FIRST_GET];
+
+	/* do CPU increments */
+	while (sync != (queue->buffer + queue->write)) {
+
+		/* different context, gets us out of this loop */
+		if ((void *)sync[SQ_IDX_TIMEOUT_CTX] !=
+				cdma->timeout.ctx_timeout)
+			break;
+
+		syncpt_incrs = (sync[SQ_IDX_SYNCPT_VAL] - syncpt_val);
+		first_get = sync[SQ_IDX_FIRST_GET];
+		nr_slots = sync[SQ_IDX_NUM_SLOTS];
+
+		/* won't need a timeout when replayed */
+		sync[SQ_IDX_TIMEOUT] = 0;
+
+		dev_dbg(dev,
+			"%s: CPU incr (%d)\n", __func__, syncpt_incrs);
+
+		dump_sync_queue_entry(cdma, sync);
+
+		/* safe to use CPU to incr syncpts */
+		cdma_op(cdma).timeout_cpu_incr(cdma, first_get,
+			syncpt_incrs, nr_slots);
+		syncpt_val += syncpt_incrs;
+		sync = advance_next_entry(cdma, sync);
+	}
+
+	dev_dbg(dev,
+		"%s: GPU incr blocked interleaved ctx buffers\n",
+		__func__);
+
+	clear_ctxsave = true;
+	exec_ctxsave = false;
+
+	/* setup GPU increments */
+	while (sync != (queue->buffer + queue->write)) {
+
+		syncpt_incrs = (sync[SQ_IDX_SYNCPT_VAL] - syncpt_val);
+		first_get = sync[SQ_IDX_FIRST_GET];
+		nr_slots = sync[SQ_IDX_NUM_SLOTS];
+
+		/* same context, increment in the pushbuffer */
+		if ((void *)sync[SQ_IDX_TIMEOUT_CTX] ==
+				cdma->timeout.ctx_timeout) {
+
+			/* won't need a timeout when replayed */
+			sync[SQ_IDX_TIMEOUT] = 0;
+
+			/* update buffer's syncpts in the pushbuffer */
+			cdma_op(cdma).timeout_pb_incr(cdma, first_get,
+				syncpt_incrs, nr_slots, exec_ctxsave);
+
+			clear_ctxsave = true;
+			exec_ctxsave = false;
+		} else {
+			dev_dbg(dev,
+				"%s: switch to a different userctx\n",
+				__func__);
+			/*
+			 * If previous context was the timed out context
+			 * then clear its CTXSAVE in this slot.
+			 */
+			if (clear_ctxsave) {
+				cdma_op(cdma).timeout_clear_ctxsave(cdma,
+					first_get, nr_slots);
+				clear_ctxsave = false;
+			}
+			exec_ctxsave = true;
+		}
+
+		dump_sync_queue_entry(cdma, sync);
+
+		syncpt_val = sync[SQ_IDX_SYNCPT_VAL];
+		sync = advance_next_entry(cdma, sync);
+	}
+
+	dev_dbg(dev,
+		"%s: finished sync_queue modification\n", __func__);
+
+	/* roll back DMAGET and start up channel again */
+	cdma_op(cdma).timeout_teardown_end(cdma, get_restart);
+
+	cdma->timeout.ctx_timeout->has_timedout = true;
+	mutex_unlock(&cdma->lock);
+}
+
 /**
  * Create a cdma
  */
@@ -361,6 +609,7 @@ int nvhost_cdma_init(struct nvhost_cdma *cdma)
 	sema_init(&cdma->sem, 0);
 	cdma->event = CDMA_EVENT_NONE;
 	cdma->running = false;
+	cdma->torndown = false;
 
 	/* allocate sync queue memory */
 	cdma->sync_queue.buffer = kzalloc(cdma_to_dev(cdma)->sync_queue_size
@@ -381,25 +630,59 @@ int nvhost_cdma_init(struct nvhost_cdma *cdma)
 void nvhost_cdma_deinit(struct nvhost_cdma *cdma)
 {
 	struct push_buffer *pb = &cdma->push_buffer;
+
 	BUG_ON(!cdma_pb_op(cdma).destroy);
 	BUG_ON(cdma->running);
 	kfree(cdma->sync_queue.buffer);
 	cdma->sync_queue.buffer = 0;
 	cdma_pb_op(cdma).destroy(pb);
+	cdma_op(cdma).timeout_destroy(cdma);
 }
 
-
 /**
  * Begin a cdma submit
  */
-void nvhost_cdma_begin(struct nvhost_cdma *cdma)
+int nvhost_cdma_begin(struct nvhost_cdma *cdma,
+		       struct nvhost_userctx_timeout *timeout)
 {
-	BUG_ON(!cdma_op(cdma).start);
 	mutex_lock(&cdma->lock);
-	if (!cdma->running)
+
+	if (timeout && timeout->has_timedout) {
+		struct nvhost_master *dev = cdma_to_dev(cdma);
+		u32 min, max;
+
+		min = nvhost_syncpt_update_min(&dev->syncpt,
+			cdma->timeout.syncpt_id);
+		max = nvhost_syncpt_read_min(&dev->syncpt,
+			cdma->timeout.syncpt_id);
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: skip timed out ctx submit (min = %d, max = %d)\n",
+			__func__, min, max);
+		mutex_unlock(&cdma->lock);
+		return -ETIMEDOUT;
+	}
+	if (timeout->timeout) {
+		/* init state on first submit with timeout value */
+		if (!cdma->timeout.initialized) {
+			int err;
+			BUG_ON(!cdma_op(cdma).timeout_init);
+			err = cdma_op(cdma).timeout_init(cdma,
+				timeout->syncpt_id);
+			if (err) {
+				mutex_unlock(&cdma->lock);
+				return err;
+			}
+		}
+	}
+	if (!cdma->running) {
+		BUG_ON(!cdma_op(cdma).start);
 		cdma_op(cdma).start(cdma);
+	}
 	cdma->slots_free = 0;
 	cdma->slots_used = 0;
+	cdma->first_get = cdma_pb_op(cdma).putptr(&cdma->push_buffer);
+	return 0;
 }
 
 /**
@@ -443,8 +726,11 @@ void nvhost_cdma_push_gather(struct nvhost_cdma *cdma,
 void nvhost_cdma_end(struct nvhost_cdma *cdma,
 		struct nvmap_client *user_nvmap,
 		u32 sync_point_id, u32 sync_point_value,
-		struct nvmap_handle **handles, unsigned int nr_handles)
+		struct nvmap_handle **handles, unsigned int nr_handles,
+		struct nvhost_userctx_timeout *timeout)
 {
+	bool was_idle = (cdma->sync_queue.read == cdma->sync_queue.write);
+
 	BUG_ON(!cdma_op(cdma).kick);
 	cdma_op(cdma).kick(cdma);
 
@@ -459,15 +745,24 @@ void nvhost_cdma_end(struct nvhost_cdma *cdma,
 		/* Add reloc entries to sync queue (as many as will fit) */
 		if (count > nr_handles)
 			count = nr_handles;
+
 		add_to_sync_queue(&cdma->sync_queue, sync_point_id,
 				  sync_point_value, cdma->slots_used,
-				  user_nvmap, handles, count);
+				  user_nvmap, handles, count, cdma->first_get,
+				  timeout);
+
 		/* NumSlots only goes in the first packet */
 		cdma->slots_used = 0;
 		handles += count;
 		nr_handles -= count;
 	}
 
+	/* start timer on idle -> active transitions */
+	if (timeout->timeout && was_idle) {
+		nvhost_cdma_start_timer(cdma, sync_point_id, sync_point_value,
+			timeout);
+	}
+
 	mutex_unlock(&cdma->lock);
 }
 
diff --git a/drivers/video/tegra/host/nvhost_cdma.h b/drivers/video/tegra/host/nvhost_cdma.h
index 45c2f7c57a7f..8bdc18b90220 100644
--- a/drivers/video/tegra/host/nvhost_cdma.h
+++ b/drivers/video/tegra/host/nvhost_cdma.h
@@ -31,6 +31,9 @@
 
 #include "nvhost_acm.h"
 
+struct nvhost_syncpt;
+struct nvhost_userctx_timeout;
+
 /*
  * cdma
  *
@@ -55,12 +58,42 @@ struct push_buffer {
 	struct nvmap_handle **handles;	/* nvmap handle for each opcode pair */
 };
 
+struct syncpt_buffer {
+	struct nvmap_handle_ref *mem; /* handle to pushbuffer memory */
+	u32 *mapped;		/* mapped gather buffer (at channel offset */
+	u32 phys;		/* physical address (at channel offset) */
+	u32 incr_per_buffer;	/* max # of incrs per GATHER */
+	u32 words_per_incr;	/* # of DWORDS in buffer to incr a syncpt */
+};
+
+enum sync_queue_idx {
+	SQ_IDX_SYNCPT_ID   = 0,
+	SQ_IDX_SYNCPT_VAL  = 1,
+	SQ_IDX_FIRST_GET   = 2,
+	SQ_IDX_TIMEOUT     = 3,
+	SQ_IDX_TIMEOUT_CTX = 4,
+	SQ_IDX_NUM_SLOTS   = (SQ_IDX_TIMEOUT_CTX + sizeof(void *)/4),
+	SQ_IDX_NUM_HANDLES = (SQ_IDX_NUM_SLOTS + 1),
+	SQ_IDX_NVMAP_CTX   = (SQ_IDX_NUM_HANDLES + 1),
+	SQ_IDX_HANDLES     = (SQ_IDX_NVMAP_CTX + sizeof(void *)/4),
+};
+
 struct sync_queue {
 	unsigned int read;		    /* read position within buffer */
 	unsigned int write;		    /* write position within buffer */
 	u32 *buffer;                        /* queue data */
 };
 
+struct buffer_timeout {
+	struct delayed_work wq;		/* work queue */
+	bool initialized;		/* timer one-time setup flag */
+	u32 syncpt_id;			/* buffer completion syncpt id */
+	u32 syncpt_val;			/* syncpt value when completed */
+	ktime_t start_ktime;		/* starting time */
+	/* context timeout information */
+	struct nvhost_userctx_timeout *ctx_timeout;
+};
+
 enum cdma_event {
 	CDMA_EVENT_NONE,		/* not waiting for any event */
 	CDMA_EVENT_SYNC_QUEUE_EMPTY,	/* wait for empty sync queue */
@@ -74,11 +107,14 @@ struct nvhost_cdma {
 	enum cdma_event event;		/* event that sem is waiting for */
 	unsigned int slots_used;	/* pb slots used in current submit */
 	unsigned int slots_free;	/* pb slots free in current submit */
+	unsigned int first_get;		/* DMAGET value, where submit begins */
 	unsigned int last_put;		/* last value written to DMAPUT */
 	struct push_buffer push_buffer;	/* channel's push buffer */
+	struct syncpt_buffer syncpt_buffer; /* syncpt incr buffer */
 	struct sync_queue sync_queue;	/* channel's sync queue */
+	struct buffer_timeout timeout;	/* channel's timeout state/wq */
 	bool running;
-
+	bool torndown;
 };
 
 #define cdma_to_channel(cdma) container_of(cdma, struct nvhost_channel, cdma)
@@ -88,22 +124,27 @@ struct nvhost_cdma {
 #define pb_to_cdma(pb) container_of(pb, struct nvhost_cdma, push_buffer)
 #define cdma_pb_op(cdma) (cdma_to_dev(cdma)->op.push_buffer)
 
-
 int	nvhost_cdma_init(struct nvhost_cdma *cdma);
 void	nvhost_cdma_deinit(struct nvhost_cdma *cdma);
 void	nvhost_cdma_stop(struct nvhost_cdma *cdma);
-void	nvhost_cdma_begin(struct nvhost_cdma *cdma);
+int	nvhost_cdma_begin(struct nvhost_cdma *cdma,
+		struct nvhost_userctx_timeout *timeout);
 void	nvhost_cdma_push(struct nvhost_cdma *cdma, u32 op1, u32 op2);
 void	nvhost_cdma_push_gather(struct nvhost_cdma *cdma,
 		struct nvmap_handle *handle, u32 op1, u32 op2);
 void	nvhost_cdma_end(struct nvhost_cdma *cdma,
 		struct nvmap_client *user_nvmap,
 		u32 sync_point_id, u32 sync_point_value,
-		struct nvmap_handle **handles, unsigned int nr_handles);
+		struct nvmap_handle **handles, unsigned int nr_handles,
+		struct nvhost_userctx_timeout *timeout);
 void	nvhost_cdma_update(struct nvhost_cdma *cdma);
 void	nvhost_cdma_flush(struct nvhost_cdma *cdma);
 void	nvhost_cdma_peek(struct nvhost_cdma *cdma,
 		u32 dmaget, int slot, u32 *out);
-
 unsigned int nvhost_cdma_wait(struct nvhost_cdma *cdma, enum cdma_event event);
+void nvhost_cdma_start_timer(struct nvhost_cdma *cdma, u32 syncpt_id,
+				u32 syncpt_val,
+				struct nvhost_userctx_timeout *timeout);
+void nvhost_cdma_update_sync_queue(struct nvhost_cdma *cdma,
+		struct nvhost_syncpt *syncpt, struct device *dev);
 #endif
diff --git a/drivers/video/tegra/host/nvhost_channel.c b/drivers/video/tegra/host/nvhost_channel.c
index ad8d403df0f7..d533ef3cd737 100644
--- a/drivers/video/tegra/host/nvhost_channel.c
+++ b/drivers/video/tegra/host/nvhost_channel.c
@@ -27,8 +27,6 @@
 
 #include <linux/platform_device.h>
 
-
-
 struct nvhost_channel *nvhost_getchannel(struct nvhost_channel *ch)
 {
 	int err = 0;
diff --git a/drivers/video/tegra/host/nvhost_channel.h b/drivers/video/tegra/host/nvhost_channel.h
index c939a19d3fad..c35c6d0a010c 100644
--- a/drivers/video/tegra/host/nvhost_channel.h
+++ b/drivers/video/tegra/host/nvhost_channel.h
@@ -50,6 +50,8 @@ struct nvhost_channeldesc {
 
 struct nvhost_channel {
 	int refcount;
+	int chid;
+	u32 syncpt_id;
 	struct mutex reflock;
 	struct mutex submitlock;
 	void __iomem *aperture;
@@ -90,6 +92,8 @@ int nvhost_channel_submit(
 	int nr_unpins,
 	u32 syncpt_id,
 	u32 syncpt_incrs,
+	u32 timeout,
+	void *timeout_ctx,
 	u32 *syncpt_value,
 	bool null_kickoff);
 
diff --git a/drivers/video/tegra/host/nvhost_hwctx.h b/drivers/video/tegra/host/nvhost_hwctx.h
index 06df90e58fb5..f128584e96f4 100644
--- a/drivers/video/tegra/host/nvhost_hwctx.h
+++ b/drivers/video/tegra/host/nvhost_hwctx.h
@@ -31,16 +31,19 @@
 
 struct nvhost_channel;
 struct nvhost_cdma;
+struct nvhost_userctx_timeout;
 
 struct nvhost_hwctx {
 	struct kref ref;
 
 	struct nvhost_channel *channel;
+	struct nvhost_userctx_timeout *timeout;
 	bool valid;
 
 	struct nvmap_handle_ref *save;
 	u32 save_incrs;
 	u32 save_thresh;
+	u32 save_slots;
 
 	struct nvmap_handle_ref *restore;
 	u32 *restore_virt;
diff --git a/drivers/video/tegra/host/nvhost_intr.c b/drivers/video/tegra/host/nvhost_intr.c
index 753c18456198..30ef7d2b8bc6 100644
--- a/drivers/video/tegra/host/nvhost_intr.c
+++ b/drivers/video/tegra/host/nvhost_intr.c
@@ -145,7 +145,7 @@ static void action_ctxsave(struct nvhost_waitlist *waiter)
 	struct nvhost_hwctx *hwctx = waiter->data;
 	struct nvhost_channel *channel = hwctx->channel;
 
-	if (channel->ctxhandler.save_service)
+	if (channel->ctxhandler.save_service && !hwctx->timeout->has_timedout)
 		channel->ctxhandler.save_service(hwctx);
 	channel->ctxhandler.put(hwctx);
 }
diff --git a/drivers/video/tegra/host/nvhost_syncpt.c b/drivers/video/tegra/host/nvhost_syncpt.c
index 6236dedf5f88..3d2ec61e07a0 100644
--- a/drivers/video/tegra/host/nvhost_syncpt.c
+++ b/drivers/video/tegra/host/nvhost_syncpt.c
@@ -115,9 +115,14 @@ int nvhost_syncpt_wait_timeout(struct nvhost_syncpt *sp, u32 id,
 
 	if (value)
 		*value = 0;
+
 	BUG_ON(!syncpt_op(sp).update_min);
-	if (!nvhost_syncpt_check_max(sp, id, thresh))
+	if (!nvhost_syncpt_check_max(sp, id, thresh)) {
+		WARN(1, "wait %d (%s) for (%d) wouldn't be met (max %d)\n",
+			id, syncpt_op(sp).name(sp, id), thresh,
+			nvhost_syncpt_read_max(sp, id));
 		return -EINVAL;
+	}
 
 	/* first check cache */
 	if (nvhost_syncpt_min_cmp(sp, id, thresh)) {
diff --git a/drivers/video/tegra/host/t20/3dctx_t20.c b/drivers/video/tegra/host/t20/3dctx_t20.c
index dadfbed3434a..7ad7166b2d3a 100644
--- a/drivers/video/tegra/host/t20/3dctx_t20.c
+++ b/drivers/video/tegra/host/t20/3dctx_t20.c
@@ -216,11 +216,12 @@ static void setup_restore_v0(u32 *ptr)
 /*** save ***/
 
 /* the same context save command sequence is used for all contexts. */
-static struct nvmap_handle_ref *save_buf = NULL;
-static phys_addr_t save_phys = 0;
-static unsigned int save_size = 0;
-static unsigned int save_incrs = 0;
-static unsigned int save_thresh = 0;
+static struct nvmap_handle_ref *save_buf;
+static phys_addr_t save_phys;
+static unsigned int save_size;
+static unsigned int save_incrs;
+static unsigned int save_thresh;
+static unsigned int save_slots;
 
 static void __init setup_save_regs(const struct ctx_saver *saver,
 			struct save_info *info,
@@ -648,6 +649,7 @@ static struct nvhost_hwctx *ctx3d_alloc_common(struct nvhost_channel *ch,
 	ctx->save = save_buf;
 	ctx->save_incrs = save_incrs;
 	ctx->save_thresh = save_thresh;
+	ctx->save_slots = save_slots;
 	ctx->restore_phys = nvmap_pin(nvmap, ctx->restore);
 	ctx->restore_size = restore_size;
 	ctx->restore_incrs = restore_incrs;
@@ -769,6 +771,15 @@ int __init t20_nvhost_3dctx_handler_init(struct nvhost_hwctx_handler *h)
 		return err;
 	}
 
+	save_slots = 1;		/* save_push_v0() */
+	if (s_is_v1) {
+		save_slots = 6;	/* save_push_v1() */
+		if (register_sets == 2)
+			save_slots += 2;
+		if (s_war_insert_syncpoints)
+			save_slots += 1;
+	}
+
 	save_ptr = nvmap_mmap(save_buf);
 	if (!save_ptr) {
 		nvmap_free(nvmap, save_buf);
diff --git a/drivers/video/tegra/host/t20/cdma_t20.c b/drivers/video/tegra/host/t20/cdma_t20.c
index eaba1c78af92..69c3039357a8 100644
--- a/drivers/video/tegra/host/t20/cdma_t20.c
+++ b/drivers/video/tegra/host/t20/cdma_t20.c
@@ -25,6 +25,9 @@
 #include "../dev.h"
 
 #include "hardware_t20.h"
+#include "syncpt_t20.h"
+
+static void t20_cdma_timeout_handler(struct work_struct *work);
 
 /*
  * push_buffer
@@ -155,6 +158,266 @@ static u32 t20_push_buffer_putptr(struct push_buffer *pb)
 	return pb->phys + pb->cur;
 }
 
+/*
+ * The syncpt incr buffer is filled with methods to increment syncpts, which
+ * is later GATHER-ed into the mainline PB. It's used when a timed out context
+ * is interleaved with other work, so needs to inline the syncpt increments
+ * to maintain the count (but otherwise does no work).
+ */
+
+/**
+ * Init timeout and syncpt incr buffer resources
+ */
+static int t20_cdma_timeout_init(struct nvhost_cdma *cdma,
+				 u32 syncpt_id)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct nvmap_client *nvmap = cdma_to_nvmap(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+	struct nvhost_channel *ch = cdma_to_channel(cdma);
+	u32 i = 0;
+
+	if (syncpt_id == NVSYNCPT_INVALID)
+		return -EINVAL;
+
+	/* allocate and map syncpt incr memory */
+	sb->mem = nvmap_alloc(nvmap,
+			(SYNCPT_INCR_BUFFER_SIZE_WORDS * sizeof(u32)), 32,
+			NVMAP_HANDLE_WRITE_COMBINE);
+	if (IS_ERR_OR_NULL(sb->mem)) {
+		sb->mem = NULL;
+		goto fail;
+	}
+	sb->mapped = nvmap_mmap(sb->mem);
+	if (sb->mapped == NULL)
+		goto fail;
+
+	/* pin syncpt buffer and get physical address */
+	sb->phys = nvmap_pin(nvmap, sb->mem);
+	if (sb->phys >= 0xfffff000) {
+		sb->phys = 0;
+		goto fail;
+	}
+
+	dev_dbg(&dev->pdev->dev, "%s: SYNCPT_INCR buffer at 0x%x\n",
+		 __func__, sb->phys);
+
+	sb->words_per_incr = (syncpt_id == NVSYNCPT_3D) ? 5 : 3;
+	sb->incr_per_buffer = (SYNCPT_INCR_BUFFER_SIZE_WORDS /
+				sb->words_per_incr);
+
+	/* init buffer with SETCL and INCR_SYNCPT methods */
+	while (i < sb->incr_per_buffer) {
+		sb->mapped[i++] = nvhost_opcode_setclass(NV_HOST1X_CLASS_ID,
+						0, 0);
+		sb->mapped[i++] = nvhost_opcode_imm_incr_syncpt(
+						NV_CLASS_HOST_SYNCPT_IMMEDIATE,
+						syncpt_id);
+		if (syncpt_id == NVSYNCPT_3D) {
+			/* also contains base increments */
+			sb->mapped[i++] = nvhost_opcode_nonincr(
+						NV_CLASS_HOST_INCR_SYNCPT_BASE,
+						1);
+			sb->mapped[i++] = nvhost_class_host_incr_syncpt_base(
+						NVWAITBASE_3D, 1);
+		}
+		sb->mapped[i++] = nvhost_opcode_setclass(ch->desc->class,
+						0, 0);
+	}
+	wmb();
+
+	INIT_DELAYED_WORK(&cdma->timeout.wq, t20_cdma_timeout_handler);
+	cdma->timeout.initialized = true;
+
+	return 0;
+fail:
+	cdma_op(cdma).timeout_destroy(cdma);
+	return -ENOMEM;
+}
+
+/**
+ * Clean up timeout syncpt buffer resources
+ */
+static void t20_cdma_timeout_destroy(struct nvhost_cdma *cdma)
+{
+	struct nvmap_client *nvmap = cdma_to_nvmap(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+
+	if (sb->mapped)
+		nvmap_munmap(sb->mem, sb->mapped);
+
+	if (sb->phys != 0)
+		nvmap_unpin(nvmap, sb->mem);
+
+	if (sb->mem)
+		nvmap_free(nvmap, sb->mem);
+
+	sb->mem = NULL;
+	sb->mapped = NULL;
+	sb->phys = 0;
+
+	if (cdma->timeout.initialized)
+		cancel_delayed_work(&cdma->timeout.wq);
+	cdma->timeout.initialized = false;
+}
+
+/**
+ * Increment timedout buffer's syncpt via CPU.
+ */
+static void t20_cdma_timeout_cpu_incr(struct nvhost_cdma *cdma, u32 getptr,
+				u32 syncpt_incrs, u32 nr_slots)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	u32 i, getidx;
+
+	for (i = 0; i < syncpt_incrs; i++)
+		nvhost_syncpt_cpu_incr(&dev->syncpt, cdma->timeout.syncpt_id);
+
+	/* after CPU incr, ensure shadow is up to date */
+	nvhost_syncpt_update_min(&dev->syncpt, cdma->timeout.syncpt_id);
+
+	/* update WAITBASE_3D by same number of incrs */
+	if (cdma->timeout.syncpt_id == NVSYNCPT_3D) {
+		void __iomem *p;
+		p = dev->sync_aperture + HOST1X_SYNC_SYNCPT_BASE_0 +
+				(NVWAITBASE_3D * sizeof(u32));
+		writel(readl(p) + syncpt_incrs, p);
+	}
+
+	/* NOP all the PB slots */
+	getidx = getptr - pb->phys;
+	while (nr_slots--) {
+		u32 *p = (u32 *)((u32)pb->mapped + getidx);
+		*(p++) = NVHOST_OPCODE_NOOP;
+		*(p++) = NVHOST_OPCODE_NOOP;
+		dev_dbg(&dev->pdev->dev, "%s: NOP at 0x%x\n",
+			__func__, pb->phys + getidx);
+		getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+	}
+	wmb();
+}
+
+/**
+ * This routine is called at the point we transition back into a timed
+ * ctx. The syncpts are incremented via pushbuffer with a flag indicating
+ * whether there's a CTXSAVE that should be still executed (for the
+ * preceding HW ctx).
+ */
+static void t20_cdma_timeout_pb_incr(struct nvhost_cdma *cdma, u32 getptr,
+				u32 syncpt_incrs, u32 nr_slots,
+				bool exec_ctxsave)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct nvhost_userctx_timeout *timeout = cdma->timeout.ctx_timeout;
+	u32 getidx, *p;
+
+	/* should have enough slots to incr to desired count */
+	BUG_ON(syncpt_incrs > (nr_slots * sb->incr_per_buffer));
+
+	getidx = getptr - pb->phys;
+	if (exec_ctxsave) {
+		/* don't disrupt the CTXSAVE of a good/non-timed out ctx */
+		nr_slots -= timeout->hwctx->save_slots;
+		syncpt_incrs -= timeout->hwctx->save_incrs;
+
+		getidx += (timeout->hwctx->save_slots * 8);
+		getidx &= (PUSH_BUFFER_SIZE - 1);
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: exec CTXSAVE of prev ctx (slots %d, incrs %d)\n",
+			__func__, nr_slots, syncpt_incrs);
+	}
+
+	while (syncpt_incrs) {
+		u32 incrs, count;
+
+		/* GATHER count are incrs * number of DWORDs per incr */
+		incrs = min(syncpt_incrs, sb->incr_per_buffer);
+		count = incrs * sb->words_per_incr;
+
+		p = (u32 *)((u32)pb->mapped + getidx);
+		*(p++) = nvhost_opcode_gather(count);
+		*(p++) = sb->phys;
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: GATHER at 0x%x, from 0x%x, dcount = %d\n",
+			__func__,
+			pb->phys + getidx, sb->phys,
+			(incrs * sb->words_per_incr));
+
+		syncpt_incrs -= incrs;
+		getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+		nr_slots--;
+	}
+
+	/* NOP remaining slots */
+	while (nr_slots--) {
+		p = (u32 *)((u32)pb->mapped + getidx);
+		*(p++) = NVHOST_OPCODE_NOOP;
+		*(p++) = NVHOST_OPCODE_NOOP;
+		dev_dbg(&dev->pdev->dev, "%s: NOP at 0x%x\n",
+			__func__, pb->phys + getidx);
+		getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+	}
+	wmb();
+}
+
+/**
+ * Clear a context switch save for a timed out context that's been
+ * queued up in a non-timed out context.
+ */
+static void t20_cdma_timeout_clear_ctxsave(struct nvhost_cdma *cdma,
+				u32 getptr, u32 nr_slots)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct syncpt_buffer *sb = &cdma->syncpt_buffer;
+	struct push_buffer *pb = &cdma->push_buffer;
+	struct nvhost_userctx_timeout *timeout = cdma->timeout.ctx_timeout;
+	u32 getidx, *p;
+
+	getidx = getptr - pb->phys;
+	p = (u32 *)((u32)pb->mapped + getidx);
+
+	if (timeout->hwctx) {
+		u32 incrs, slots_to_clear;
+
+		slots_to_clear = timeout->hwctx->save_slots;
+		incrs = timeout->hwctx->save_incrs;
+
+		BUG_ON(slots_to_clear > nr_slots);
+		BUG_ON(incrs > sb->incr_per_buffer);
+
+		dev_dbg(&dev->pdev->dev,
+			"%s: clearing CTXSAVE at 0x%x, for %d slots %d incrs\n",
+			__func__, pb->phys + getidx, slots_to_clear, incrs);
+
+		/* first, GATHER incr for ctxsave */
+		if (incrs) {
+			u32 count = incrs * sb->words_per_incr;
+
+			p = (u32 *)((u32)pb->mapped + getidx);
+			*(p++) = nvhost_opcode_gather(count);
+			*(p++) = sb->phys;
+
+			getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+			slots_to_clear--;
+		}
+
+		/* NOP remaining slots */
+		while (slots_to_clear--) {
+			p = (u32 *)((u32)pb->mapped + getidx);
+			*(p++) = NVHOST_OPCODE_NOOP;
+			*(p++) = NVHOST_OPCODE_NOOP;
+			dev_dbg(&dev->pdev->dev, "%s: NOP at 0x%x\n",
+				__func__, pb->phys + getidx);
+			getidx = (getidx + 8) & (PUSH_BUFFER_SIZE - 1);
+		}
+	}
+	wmb();
+}
 
 /**
  * Start channel DMA
@@ -167,7 +430,6 @@ static void t20_cdma_start(struct nvhost_cdma *cdma)
 		return;
 
 	BUG_ON(!cdma_pb_op(cdma).putptr);
-
 	cdma->last_put = cdma_pb_op(cdma).putptr(&cdma->push_buffer);
 
 	writel(nvhost_channel_dmactrl(true, false, false),
@@ -190,6 +452,53 @@ static void t20_cdma_start(struct nvhost_cdma *cdma)
 }
 
 /**
+ * Similar to t20_cdma_start(), but rather than starting from an idle
+ * state (where DMA GET is set to DMA PUT), on a timeout we restore
+ * DMA GET from an explicit value (so DMA may again be pending).
+ */
+static void t20_cdma_timeout_restart(struct nvhost_cdma *cdma, u32 getptr)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	void __iomem *chan_regs = cdma_to_channel(cdma)->aperture;
+
+	if (cdma->running)
+		return;
+
+	BUG_ON(!cdma_pb_op(cdma).putptr);
+	cdma->last_put = cdma_pb_op(cdma).putptr(&cdma->push_buffer);
+
+	writel(nvhost_channel_dmactrl(true, false, false),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+
+	/* set base, end pointer (all of memory) */
+	writel(0, chan_regs + HOST1X_CHANNEL_DMASTART);
+	writel(0xFFFFFFFF, chan_regs + HOST1X_CHANNEL_DMAEND);
+
+	/* set GET, by loading the value in PUT (then reset GET) */
+	writel(getptr, chan_regs + HOST1X_CHANNEL_DMAPUT);
+	writel(nvhost_channel_dmactrl(true, true, true),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+
+	dev_dbg(&dev->pdev->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__,
+		readl(chan_regs + HOST1X_CHANNEL_DMAGET),
+		readl(chan_regs + HOST1X_CHANNEL_DMAPUT),
+		cdma->last_put);
+
+	/* deassert GET reset and set PUT */
+	writel(nvhost_channel_dmactrl(true, false, false),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+	writel(cdma->last_put, chan_regs + HOST1X_CHANNEL_DMAPUT);
+
+	/* start the command DMA */
+	writel(nvhost_channel_dmactrl(false, false, false),
+		chan_regs + HOST1X_CHANNEL_DMACTRL);
+
+	cdma->running = true;
+}
+
+/**
  * Kick channel DMA into action by writing its PUT offset (if it has changed)
  */
 static void t20_cdma_kick(struct nvhost_cdma *cdma)
@@ -235,12 +544,145 @@ void t20_cdma_peek(struct nvhost_cdma *cdma,
 	out[1] = p[offset + 1];
 }
 
+/**
+ * Stops both channel's command processor and CDMA immediately.
+ * Also, tears down the channel and resets corresponding module.
+ */
+void t20_cdma_timeout_teardown_begin(struct nvhost_cdma *cdma)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct nvhost_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	BUG_ON(cdma->torndown);
+
+	dev_dbg(&dev->pdev->dev,
+		"begin channel teardown (channel id %d)\n", ch->chid);
+
+	cmdproc_stop = readl(dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = nvhost_sync_cmdproc_stop_chid(cmdproc_stop, ch->chid);
+	writel(cmdproc_stop, dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(&dev->pdev->dev,
+		"%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
+		__func__,
+		readl(ch->aperture + HOST1X_CHANNEL_DMAGET),
+		readl(ch->aperture + HOST1X_CHANNEL_DMAPUT),
+		cdma->last_put);
+
+	writel(nvhost_channel_dmactrl(true, false, false),
+		ch->aperture + HOST1X_CHANNEL_DMACTRL);
+
+	writel(BIT(ch->chid), dev->sync_aperture + HOST1X_SYNC_CH_TEARDOWN);
+	nvhost_module_reset(&ch->mod);
+
+	cdma->running = false;
+	cdma->torndown = true;
+}
+
+void t20_cdma_timeout_teardown_end(struct nvhost_cdma *cdma, u32 getptr)
+{
+	struct nvhost_master *dev = cdma_to_dev(cdma);
+	struct nvhost_channel *ch = cdma_to_channel(cdma);
+	u32 cmdproc_stop;
+
+	BUG_ON(!cdma->torndown || cdma->running);
+
+	dev_dbg(&dev->pdev->dev,
+		"end channel teardown (id %d, DMAGET restart = 0x%x)\n",
+		ch->chid, getptr);
+
+	cmdproc_stop = readl(dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = nvhost_sync_cmdproc_run_chid(cmdproc_stop, ch->chid);
+	writel(cmdproc_stop, dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+
+	cdma->torndown = false;
+	t20_cdma_timeout_restart(cdma, getptr);
+}
+
+/**
+ * If this timeout fires, it indicates the current sync_queue entry has
+ * exceeded its TTL and the userctx should be timed out and remaining
+ * submits already issued cleaned up (future submits return an error).
+ */
+static void t20_cdma_timeout_handler(struct work_struct *work)
+{
+	struct nvhost_cdma *cdma;
+	struct nvhost_master *dev;
+	struct nvhost_syncpt *sp;
+	struct nvhost_channel *ch;
+
+	u32 syncpt_val;
+
+	u32 prev_cmdproc, cmdproc_stop;
+
+	cdma = container_of(to_delayed_work(work), struct nvhost_cdma,
+			    timeout.wq);
+	dev = cdma_to_dev(cdma);
+	sp = &dev->syncpt;
+	ch = cdma_to_channel(cdma);
+
+	mutex_lock(&cdma->lock);
+
+	if (!cdma->timeout.ctx_timeout) {
+		dev_dbg(&dev->pdev->dev,
+			 "cdma_timeout: expired, but has NULL context\n");
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	/* stop processing to get a clean snapshot */
+	prev_cmdproc = readl(dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+	cmdproc_stop = nvhost_sync_cmdproc_stop_chid(prev_cmdproc, ch->chid);
+	writel(cmdproc_stop, dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+
+	dev_dbg(&dev->pdev->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
+		prev_cmdproc, cmdproc_stop);
+
+	syncpt_val = nvhost_syncpt_update_min(&dev->syncpt,
+			cdma->timeout.syncpt_id);
+
+	/* has buffer actually completed? */
+	if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) {
+		dev_dbg(&dev->pdev->dev,
+			 "cdma_timeout: expired, but buffer had completed\n");
+		/* restore */
+		cmdproc_stop = nvhost_sync_cmdproc_run_chid(prev_cmdproc,
+			ch->chid);
+		writel(cmdproc_stop,
+			dev->sync_aperture + HOST1X_SYNC_CMDPROC_STOP);
+		mutex_unlock(&cdma->lock);
+		return;
+	}
+
+	dev_warn(&dev->pdev->dev,
+		"%s: timeout: %d (%s) ctx 0x%p, HW thresh %d, done %d\n",
+		__func__,
+		cdma->timeout.syncpt_id,
+		syncpt_op(sp).name(sp, cdma->timeout.syncpt_id),
+		cdma->timeout.ctx_timeout,
+		syncpt_val, cdma->timeout.syncpt_val);
+
+	/* stop HW, resetting channel/module */
+	cdma_op(cdma).timeout_teardown_begin(cdma);
+
+	nvhost_cdma_update_sync_queue(cdma, sp, &dev->pdev->dev);
+}
+
 int nvhost_init_t20_cdma_support(struct nvhost_master *host)
 {
 	host->op.cdma.start = t20_cdma_start;
 	host->op.cdma.stop = t20_cdma_stop;
 	host->op.cdma.kick = t20_cdma_kick;
 
+	host->op.cdma.timeout_init = t20_cdma_timeout_init;
+	host->op.cdma.timeout_destroy = t20_cdma_timeout_destroy;
+	host->op.cdma.timeout_teardown_begin = t20_cdma_timeout_teardown_begin;
+	host->op.cdma.timeout_teardown_end = t20_cdma_timeout_teardown_end;
+	host->op.cdma.timeout_cpu_incr = t20_cdma_timeout_cpu_incr;
+	host->op.cdma.timeout_pb_incr = t20_cdma_timeout_pb_incr;
+	host->op.cdma.timeout_clear_ctxsave = t20_cdma_timeout_clear_ctxsave;
+
 	host->sync_queue_size = NVHOST_SYNC_QUEUE_SIZE;
 
 	host->op.push_buffer.reset = t20_push_buffer_reset;
diff --git a/drivers/video/tegra/host/t20/channel_t20.c b/drivers/video/tegra/host/t20/channel_t20.c
index fdbf6ba7355d..b45c00421ec9 100644
--- a/drivers/video/tegra/host/t20/channel_t20.c
+++ b/drivers/video/tegra/host/t20/channel_t20.c
@@ -27,6 +27,7 @@
 
 #include "hardware_t20.h"
 #include "syncpt_t20.h"
+#include "../dev.h"
 
 #define NVHOST_NUMCHANNELS (NV_HOST1X_CHANNELS - 1)
 #define NVHOST_CHANNEL_BASE 0
@@ -42,10 +43,7 @@
 #define NVMODMUTEX_DSI       (9)
 #define NV_FIFO_READ_TIMEOUT 200000
 
-static void power_2d(struct nvhost_module *mod, enum nvhost_power_action action);
 static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action);
-static void power_mpe(struct nvhost_module *mod, enum nvhost_power_action action);
-
 
 
 static const struct nvhost_channeldesc channelmap[] = {
@@ -74,7 +72,6 @@ static const struct nvhost_channeldesc channelmap[] = {
 	.waitbases     = BIT(NVWAITBASE_2D_0) | BIT(NVWAITBASE_2D_1),
 	.modulemutexes = BIT(NVMODMUTEX_2D_FULL) | BIT(NVMODMUTEX_2D_SIMPLE) |
 			 BIT(NVMODMUTEX_2D_SB_A) | BIT(NVMODMUTEX_2D_SB_B),
-	.power         = power_2d,
 },
 {
 	/* channel 3 */
@@ -98,7 +95,6 @@ static const struct nvhost_channeldesc channelmap[] = {
 			 BIT(NVSYNCPT_MPE_WR_SAFE),
 	.waitbases     = BIT(NVWAITBASE_MPE),
 	.class	       = NV_VIDEO_ENCODE_MPEG_CLASS_ID,
-	.power	       = power_mpe,
 	.exclusive     = true,
 	.keepalive     = true,
 },
@@ -138,6 +134,7 @@ static int t20_channel_init(struct nvhost_channel *ch,
 			    struct nvhost_master *dev, int index)
 {
 	ch->dev = dev;
+	ch->chid = index;
 	ch->desc = channelmap + index;
 	mutex_init(&ch->reflock);
 	mutex_init(&ch->submitlock);
@@ -161,6 +158,7 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 			      int nr_unpins,
 			      u32 syncpt_id,
 			      u32 syncpt_incrs,
+			      struct nvhost_userctx_timeout *timeout,
 			      u32 *syncpt_value,
 			      bool null_kickoff)
 {
@@ -176,6 +174,9 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 	if (strcmp(channel->mod.name, "gr3d") == 0)
 		module3d_notify_busy();
 
+	/* before error checks, return current max */
+	*syncpt_value = nvhost_syncpt_read_max(sp, syncpt_id);
+
 	/* get submit lock */
 	err = mutex_lock_interruptible(&channel->submitlock);
 	if (err) {
@@ -198,11 +199,26 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 		}
 	}
 
+	/* begin a CDMA submit */
+	err = nvhost_cdma_begin(&channel->cdma, timeout);
+	if (err) {
+		mutex_unlock(&channel->submitlock);
+		nvhost_module_idle(&channel->mod);
+		return err;
+	}
+
 	/* context switch */
 	if (channel->cur_ctx != hwctx) {
 		trace_nvhost_channel_context_switch(channel->desc->name,
 		  channel->cur_ctx, hwctx);
 		hwctx_to_save = channel->cur_ctx;
+		if (hwctx_to_save && hwctx_to_save->timeout &&
+			hwctx_to_save->timeout->has_timedout) {
+			hwctx_to_save = NULL;
+			dev_dbg(&channel->dev->pdev->dev,
+				"%s: skip save of timed out context (0x%p)\n",
+				__func__, channel->cur_ctx->timeout);
+		}
 		if (hwctx_to_save) {
 			syncpt_incrs += hwctx_to_save->save_incrs;
 			hwctx_to_save->valid = true;
@@ -223,9 +239,6 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 		syncval = nvhost_syncpt_incr_max(sp,
 						syncpt_id, syncpt_incrs);
 
-	/* begin a CDMA submit */
-	nvhost_cdma_begin(&channel->cdma);
-
 	/* push save buffer (pre-gather setup depends on unit) */
 	if (hwctx_to_save)
 		channel->ctxhandler.save_push(&channel->cdma, hwctx_to_save);
@@ -281,7 +294,8 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	nvhost_cdma_end(&channel->cdma, user_nvmap,
-			syncpt_id, syncval, unpins, nr_unpins);
+			syncpt_id, syncval, unpins, nr_unpins,
+			timeout);
 
 	trace_nvhost_channel_submitted(channel->desc->name,
 			syncval-syncpt_incrs, syncval);
@@ -308,23 +322,16 @@ static int t20_channel_submit(struct nvhost_channel *channel,
 	return 0;
 }
 
-static void power_2d(struct nvhost_module *mod, enum nvhost_power_action action)
-{
-	/* TODO: [ahatala 2010-06-17] reimplement EPP hang war */
-	if (action == NVHOST_POWER_ACTION_OFF) {
-		/* TODO: [ahatala 2010-06-17] reset EPP */
-	}
-}
-
 static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 {
 	struct nvhost_channel *ch = container_of(mod, struct nvhost_channel, mod);
 	struct nvhost_hwctx *hwctx_to_save;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	u32 syncpt_incrs, syncpt_val;
+	int err;
 	void *ref;
 
-	if (action != NVHOST_POWER_ACTION_OFF)
+	if ((action != NVHOST_POWER_ACTION_OFF) || !mod->can_powergate)
 		return;
 
 	mutex_lock(&ch->submitlock);
@@ -337,6 +344,12 @@ static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 	if (strcmp(mod->name, "gr3d") == 0)
 		module3d_notify_busy();
 
+	err = nvhost_cdma_begin(&ch->cdma, hwctx_to_save->timeout);
+	if (err) {
+		mutex_unlock(&ch->submitlock);
+		return;
+	}
+
 	hwctx_to_save->valid = true;
 	ch->ctxhandler.get(hwctx_to_save);
 	ch->cur_ctx = NULL;
@@ -345,9 +358,9 @@ static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 	syncpt_val = nvhost_syncpt_incr_max(&ch->dev->syncpt,
 					NVSYNCPT_3D, syncpt_incrs);
 
-	nvhost_cdma_begin(&ch->cdma);
 	ch->ctxhandler.save_push(&ch->cdma, hwctx_to_save);
-	nvhost_cdma_end(&ch->cdma, ch->dev->nvmap, NVSYNCPT_3D, syncpt_val, NULL, 0);
+	nvhost_cdma_end(&ch->cdma, ch->dev->nvmap, NVSYNCPT_3D, syncpt_val,
+			NULL, 0, hwctx_to_save->timeout);
 
 	nvhost_intr_add_action(&ch->dev->intr, NVSYNCPT_3D,
 			syncpt_val - syncpt_incrs + hwctx_to_save->save_thresh,
@@ -366,13 +379,10 @@ static void power_3d(struct nvhost_module *mod, enum nvhost_power_action action)
 	mutex_unlock(&ch->submitlock);
 }
 
-static void power_mpe(struct nvhost_module *mod, enum nvhost_power_action action)
-{
-}
-
 static int t20_channel_read_3d_reg(
 	struct nvhost_channel *channel,
 	struct nvhost_hwctx *hwctx,
+	struct nvhost_userctx_timeout *timeout,
 	u32 offset,
 	u32 *value)
 {
@@ -414,7 +424,7 @@ static int t20_channel_read_3d_reg(
 		NVSYNCPT_3D, syncpt_incrs);
 
 	/* begin a CDMA submit */
-	nvhost_cdma_begin(&channel->cdma);
+	nvhost_cdma_begin(&channel->cdma, timeout);
 
 	/* push save buffer (pre-gather setup depends on unit) */
 	if (hwctx_to_save)
@@ -463,7 +473,8 @@ static int t20_channel_read_3d_reg(
 
 	/* end CDMA submit  */
 	nvhost_cdma_end(&channel->cdma, channel->dev->nvmap,
-			NVSYNCPT_3D, syncval, NULL, 0);
+			NVSYNCPT_3D, syncval, NULL, 0,
+			timeout);
 
 	/*
 	 * schedule a context save interrupt (to drain the host FIFO
diff --git a/drivers/video/tegra/host/t20/hardware_t20.h b/drivers/video/tegra/host/t20/hardware_t20.h
index 1e68bdcde0fa..4245a44c6bc2 100644
--- a/drivers/video/tegra/host/t20/hardware_t20.h
+++ b/drivers/video/tegra/host/t20/hardware_t20.h
@@ -91,6 +91,8 @@ enum {
 	HOST1X_SYNC_SYNCPT_THRESH_CPU1_INT_STATUS = 0x48,
 	HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE = 0x60,
 	HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0 = 0x68,
+	HOST1X_SYNC_CMDPROC_STOP = 0xac,
+	HOST1X_SYNC_CH_TEARDOWN = 0xb0,
 	HOST1X_SYNC_USEC_CLK = 0x1a4,
 	HOST1X_SYNC_CTXSW_TIMEOUT_CFG = 0x1a8,
 	HOST1X_SYNC_IP_BUSY_TIMEOUT = 0x1bc,
@@ -129,6 +131,20 @@ static inline unsigned int nvhost_sync_mlock_owner_owner_chid(u32 reg)
 	return (reg >> 8) & 0xf;
 }
 
+static inline unsigned int nvhost_sync_cmdproc_stop_chid(u32 reg, u32 chid)
+{
+	return reg | BIT(chid);
+}
+
+static inline unsigned int nvhost_sync_cmdproc_run_chid(u32 reg, u32 chid)
+{
+	return reg & ~(BIT(chid));
+}
+
+static inline unsigned int nvhost_sync_ch_teardown_chid(u32 reg, u32 chid)
+{
+	return reg | BIT(chid);
+}
 
 /* host class methods */
 enum {
@@ -271,4 +287,8 @@ int nvhost_drain_read_fifo(void __iomem *chan_regs,
 /* 8 bytes per slot. (This number does not include the final RESTART.) */
 #define PUSH_BUFFER_SIZE (NVHOST_GATHER_QUEUE_SIZE * 8)
 
+/* 4K page containing GATHERed methods to increment channel syncpts
+ * and replaces the original timed out contexts GATHER slots */
+#define SYNCPT_INCR_BUFFER_SIZE_WORDS   (4096 / sizeof(u32))
+
 #endif /* __NVHOST_HARDWARE_T20_H */
author	Chris Johnson <cwj@nvidia.com>	2011-08-12 09:04:09 +0300
committer	Dan Willemsen <dwillemsen@nvidia.com>	2011-11-30 21:48:21 -0800
commit	b33dead2cc3e262000ba1915b19c27b6b0b87f41 (patch)
tree	587b2f7261c8f1401e1aaa737d3f901eb8d3b49b /drivers
parent	14dcbce77792e27a94adb242650c50809435af30 (diff)