summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-04-01 08:28:44 +0300
committerWinnie Hsu <whsu@nvidia.com>2015-01-14 14:17:13 -0800
commita6acd970218523768e21f7ebb86d5e75840f4cc5 (patch)
tree6d491d65eda9d3e5b45fb8e46ce773d5a2d96bc6
parentc03e0d2b98aceeafdc7290a4a1da85550592a29e (diff)
gpu: nvgpu: Allow suppressing WFI on submit
Allow suppressing WFI when submitting work and requesting a fence back. Bug 1491545 Change-Id: Ic3d061bb4f116cf7ea68dbd6a1b2ace9f11d0ab5 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/390457 Reviewed-on: http://git-master/r/671029 GVS: Gerrit_Virtual_Submit Reviewed-by: Sibashis Mohapatra <sibashism@nvidia.com> Tested-by: Sibashis Mohapatra <sibashism@nvidia.com> Reviewed-by: Yogesh Kini <ykini@nvidia.com> Reviewed-by: Winnie Hsu <whsu@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h2
-rw-r--r--include/linux/nvhost_ioctl.h2
4 files changed, 18 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f145b661241b..07137c28ea6e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1413,6 +1413,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
/* we might need two extra gpfifo entries - one for pre fence
* and one for post fence. */
const int extra_entries = 2;
+ bool need_wfi = !(flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
if (c->has_timedout)
return -ETIMEDOUT;
@@ -1505,10 +1506,12 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
err = c->sync->incr_user_fd(c->sync, &incr_cmd,
&c->last_submit_fence,
+ need_wfi,
&fence->syncpt_id);
else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
err = c->sync->incr_user_syncpt(c->sync, &incr_cmd,
&c->last_submit_fence,
+ need_wfi,
&fence->syncpt_id,
&fence->value);
else
@@ -1523,7 +1526,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
u64_hi32(wait_cmd->gva) |
pbdma_gp_entry1_length_f(wait_cmd->size);
- trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+ trace_gk20a_push_cmdbuf(c->g->dev->name,
+ 0, wait_cmd->size, 0, wait_cmd->ptr);
c->gpfifo.put = (c->gpfifo.put + 1) &
(c->gpfifo.entry_num - 1);
@@ -1548,7 +1552,8 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
c->gpfifo.cpu_va[c->gpfifo.put].entry1 =
u64_hi32(incr_cmd->gva) |
pbdma_gp_entry1_length_f(incr_cmd->size);
- trace_write_pushbuffer(c, &c->gpfifo.cpu_va[c->gpfifo.put]);
+ trace_gk20a_push_cmdbuf(c->g->dev->name,
+ 0, incr_cmd->size, 0, incr_cmd->ptr);
c->gpfifo.put = (c->gpfifo.put + 1) &
(c->gpfifo.entry_num - 1);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index a8f57830f8ad..952087e5e07a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -261,6 +261,7 @@ int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_channel_fence *fence,
+ bool wfi,
u32 *id, u32 *thresh)
{
struct gk20a_channel_syncpt *sp =
@@ -268,8 +269,10 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
/* Need to do 'host incr + wfi' or 'gfx incr' since we return the fence
* to user space. */
int err = __gk20a_channel_syncpt_incr(s,
- sp->c->obj_class == KEPLER_C /* use gfx class? */,
- sp->c->obj_class != KEPLER_C /* wfi if host class */,
+ wfi &&
+ sp->c->obj_class == KEPLER_C /* use gfx class? */,
+ wfi &&
+ sp->c->obj_class != KEPLER_C /* wfi if host class */,
true /* register irq */,
entry, fence);
if (err)
@@ -282,6 +285,7 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s,
int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_channel_fence *fence,
+ bool wfi,
int *fd)
{
#ifdef CONFIG_SYNC
@@ -289,7 +293,7 @@ int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s,
struct nvhost_ctrl_sync_fence_info pt;
struct gk20a_channel_syncpt *sp =
container_of(s, struct gk20a_channel_syncpt, ops);
- err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence,
+ err = gk20a_channel_syncpt_incr_user_syncpt(s, entry, fence, wfi,
&pt.id, &pt.thresh);
if (err)
return err;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 80f38b266089..90b61bfd7131 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -77,6 +77,7 @@ struct gk20a_channel_sync {
int (*incr_user_syncpt)(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_channel_fence *fence,
+ bool wfi,
u32 *id, u32 *thresh);
/* Increment syncpoint/semaphore, so that the returned fence represents
@@ -89,6 +90,7 @@ struct gk20a_channel_sync {
int (*incr_user_fd)(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_channel_fence *fence,
+ bool wfi,
int *fd);
/* Reset the channel syncpoint/semaphore. */
diff --git a/include/linux/nvhost_ioctl.h b/include/linux/nvhost_ioctl.h
index 4b6e1a2fb132..b060864ff1d1 100644
--- a/include/linux/nvhost_ioctl.h
+++ b/include/linux/nvhost_ioctl.h
@@ -143,6 +143,8 @@ struct nvhost_fence {
#define NVHOST_SUBMIT_GPFIFO_FLAGS_HW_FORMAT BIT(2)
/* create a sync fence fd instead of raw fence */
#define NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE BIT(3)
+/* suppress WFI before fence trigger */
+#define NVHOST_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI BIT(4)
struct nvhost_submit_gpfifo_args {
__u64 gpfifo;