summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau/nouveau_fence.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nouveau_fence.c')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c288
1 files changed, 191 insertions, 97 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ab1bbfbf266e..4b9f4493c9f9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -27,6 +27,9 @@
#include "drmP.h"
#include "drm.h"
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+
#include "nouveau_drv.h"
#include "nouveau_ramht.h"
#include "nouveau_dma.h"
@@ -64,6 +67,7 @@ nouveau_fence_del(struct kref *ref)
struct nouveau_fence *fence =
container_of(ref, struct nouveau_fence, refcount);
+ nouveau_channel_ref(NULL, &fence->channel);
kfree(fence);
}
@@ -76,14 +80,17 @@ nouveau_fence_update(struct nouveau_channel *chan)
spin_lock(&chan->fence.lock);
- if (USE_REFCNT(dev))
- sequence = nvchan_rd32(chan, 0x48);
- else
- sequence = atomic_read(&chan->fence.last_sequence_irq);
+ /* Fetch the last sequence if the channel is still up and running */
+ if (likely(!list_empty(&chan->fence.pending))) {
+ if (USE_REFCNT(dev))
+ sequence = nvchan_rd32(chan, 0x48);
+ else
+ sequence = atomic_read(&chan->fence.last_sequence_irq);
- if (chan->fence.sequence_ack == sequence)
- goto out;
- chan->fence.sequence_ack = sequence;
+ if (chan->fence.sequence_ack == sequence)
+ goto out;
+ chan->fence.sequence_ack = sequence;
+ }
list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
sequence = fence->sequence;
@@ -113,13 +120,13 @@ nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
if (!fence)
return -ENOMEM;
kref_init(&fence->refcount);
- fence->channel = chan;
+ nouveau_channel_ref(chan, &fence->channel);
if (emit)
ret = nouveau_fence_emit(fence);
if (ret)
- nouveau_fence_unref((void *)&fence);
+ nouveau_fence_unref(&fence);
*pfence = fence;
return ret;
}
@@ -127,7 +134,7 @@ nouveau_fence_new(struct nouveau_channel *chan, struct nouveau_fence **pfence,
struct nouveau_channel *
nouveau_fence_channel(struct nouveau_fence *fence)
{
- return fence ? fence->channel : NULL;
+ return fence ? nouveau_channel_get_unlocked(fence->channel) : NULL;
}
int
@@ -135,6 +142,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
{
struct nouveau_channel *chan = fence->channel;
struct drm_device *dev = chan->dev;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
int ret;
ret = RING_SPACE(chan, 2);
@@ -155,8 +163,15 @@ nouveau_fence_emit(struct nouveau_fence *fence)
list_add_tail(&fence->entry, &chan->fence.pending);
spin_unlock(&chan->fence.lock);
- BEGIN_RING(chan, NvSubSw, USE_REFCNT(dev) ? 0x0050 : 0x0150, 1);
- OUT_RING(chan, fence->sequence);
+ if (USE_REFCNT(dev)) {
+ if (dev_priv->card_type < NV_C0)
+ BEGIN_RING(chan, NvSubSw, 0x0050, 1);
+ else
+ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0050, 1);
+ } else {
+ BEGIN_RING(chan, NvSubSw, 0x0150, 1);
+ }
+ OUT_RING (chan, fence->sequence);
FIRE_RING(chan);
return 0;
@@ -182,7 +197,7 @@ nouveau_fence_work(struct nouveau_fence *fence,
}
void
-nouveau_fence_unref(void **sync_obj)
+__nouveau_fence_unref(void **sync_obj)
{
struct nouveau_fence *fence = nouveau_fence(*sync_obj);
@@ -192,7 +207,7 @@ nouveau_fence_unref(void **sync_obj)
}
void *
-nouveau_fence_ref(void *sync_obj)
+__nouveau_fence_ref(void *sync_obj)
{
struct nouveau_fence *fence = nouveau_fence(sync_obj);
@@ -201,7 +216,7 @@ nouveau_fence_ref(void *sync_obj)
}
bool
-nouveau_fence_signalled(void *sync_obj, void *sync_arg)
+__nouveau_fence_signalled(void *sync_obj, void *sync_arg)
{
struct nouveau_fence *fence = nouveau_fence(sync_obj);
struct nouveau_channel *chan = fence->channel;
@@ -214,13 +229,15 @@ nouveau_fence_signalled(void *sync_obj, void *sync_arg)
}
int
-nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
+__nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
{
unsigned long timeout = jiffies + (3 * DRM_HZ);
+ unsigned long sleep_time = NSEC_PER_MSEC / 1000;
+ ktime_t t;
int ret = 0;
while (1) {
- if (nouveau_fence_signalled(sync_obj, sync_arg))
+ if (__nouveau_fence_signalled(sync_obj, sync_arg))
break;
if (time_after_eq(jiffies, timeout)) {
@@ -230,8 +247,13 @@ nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
__set_current_state(intr ? TASK_INTERRUPTIBLE
: TASK_UNINTERRUPTIBLE);
- if (lazy)
- schedule_timeout(1);
+ if (lazy) {
+ t = ktime_set(0, sleep_time);
+ schedule_hrtimeout(&t, HRTIMER_MODE_REL);
+ sleep_time *= 2;
+ if (sleep_time > NSEC_PER_MSEC)
+ sleep_time = NSEC_PER_MSEC;
+ }
if (intr && signal_pending(current)) {
ret = -ERESTARTSYS;
@@ -245,11 +267,12 @@ nouveau_fence_wait(void *sync_obj, void *sync_arg, bool lazy, bool intr)
}
static struct nouveau_semaphore *
-alloc_semaphore(struct drm_device *dev)
+semaphore_alloc(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
struct nouveau_semaphore *sema;
- int ret;
+ int size = (dev_priv->chipset < 0x84) ? 4 : 16;
+ int ret, i;
if (!USE_SEMA(dev))
return NULL;
@@ -263,9 +286,9 @@ alloc_semaphore(struct drm_device *dev)
goto fail;
spin_lock(&dev_priv->fence.lock);
- sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
+ sema->mem = drm_mm_search_free(&dev_priv->fence.heap, size, 0, 0);
if (sema->mem)
- sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
+ sema->mem = drm_mm_get_block_atomic(sema->mem, size, 0);
spin_unlock(&dev_priv->fence.lock);
if (!sema->mem)
@@ -273,7 +296,8 @@ alloc_semaphore(struct drm_device *dev)
kref_init(&sema->ref);
sema->dev = dev;
- nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
+ for (i = sema->mem->start; i < sema->mem->start + size; i += 4)
+ nouveau_bo_wr32(dev_priv->fence.bo, i / 4, 0);
return sema;
fail:
@@ -282,7 +306,7 @@ fail:
}
static void
-free_semaphore(struct kref *ref)
+semaphore_free(struct kref *ref)
{
struct nouveau_semaphore *sema =
container_of(ref, struct nouveau_semaphore, ref);
@@ -304,61 +328,107 @@ semaphore_work(void *priv, bool signalled)
if (unlikely(!signalled))
nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
- kref_put(&sema->ref, free_semaphore);
+ kref_put(&sema->ref, semaphore_free);
}
static int
-emit_semaphore(struct nouveau_channel *chan, int method,
- struct nouveau_semaphore *sema)
+semaphore_acquire(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
{
- struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
- struct nouveau_fence *fence;
- bool smart = (dev_priv->card_type >= NV_50);
+ struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+ struct nouveau_fence *fence = NULL;
int ret;
- ret = RING_SPACE(chan, smart ? 8 : 4);
+ if (dev_priv->chipset < 0x84) {
+ ret = RING_SPACE(chan, 3);
+ if (ret)
+ return ret;
+
+ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 2);
+ OUT_RING (chan, sema->mem->start);
+ OUT_RING (chan, 1);
+ } else
+ if (dev_priv->chipset < 0xc0) {
+ struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+ u64 offset = vma->offset + sema->mem->start;
+
+ ret = RING_SPACE(chan, 5);
+ if (ret)
+ return ret;
+
+ BEGIN_RING(chan, NvSubSw, 0x0010, 4);
+ OUT_RING (chan, upper_32_bits(offset));
+ OUT_RING (chan, lower_32_bits(offset));
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 1); /* ACQUIRE_EQ */
+ } else {
+ struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+ u64 offset = vma->offset + sema->mem->start;
+
+ ret = RING_SPACE(chan, 5);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4);
+ OUT_RING (chan, upper_32_bits(offset));
+ OUT_RING (chan, lower_32_bits(offset));
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0x1001); /* ACQUIRE_EQ */
+ }
+
+ /* Delay semaphore destruction until its work is done */
+ ret = nouveau_fence_new(chan, &fence, true);
if (ret)
return ret;
- if (smart) {
- BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
- OUT_RING(chan, NvSema);
- }
- BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
- OUT_RING(chan, sema->mem->start);
-
- if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
- /*
- * NV50 tries to be too smart and context-switch
- * between semaphores instead of doing a "first come,
- * first served" strategy like previous cards
- * do.
- *
- * That's bad because the ACQUIRE latency can get as
- * large as the PFIFO context time slice in the
- * typical DRI2 case where you have several
- * outstanding semaphores at the same moment.
- *
- * If we're going to ACQUIRE, force the card to
- * context switch before, just in case the matching
- * RELEASE is already scheduled to be executed in
- * another channel.
- */
- BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
- OUT_RING(chan, 0);
- }
+ kref_get(&sema->ref);
+ nouveau_fence_work(fence, semaphore_work, sema);
+ nouveau_fence_unref(&fence);
+ return 0;
+}
+
+static int
+semaphore_release(struct nouveau_channel *chan, struct nouveau_semaphore *sema)
+{
+ struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+ struct nouveau_fence *fence = NULL;
+ int ret;
+
+ if (dev_priv->chipset < 0x84) {
+ ret = RING_SPACE(chan, 4);
+ if (ret)
+ return ret;
+
+ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
+ OUT_RING (chan, sema->mem->start);
+ BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_RELEASE, 1);
+ OUT_RING (chan, 1);
+ } else
+ if (dev_priv->chipset < 0xc0) {
+ struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+ u64 offset = vma->offset + sema->mem->start;
+
+ ret = RING_SPACE(chan, 5);
+ if (ret)
+ return ret;
+
+ BEGIN_RING(chan, NvSubSw, 0x0010, 4);
+ OUT_RING (chan, upper_32_bits(offset));
+ OUT_RING (chan, lower_32_bits(offset));
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 2); /* RELEASE */
+ } else {
+ struct nouveau_vma *vma = &dev_priv->fence.bo->vma;
+ u64 offset = vma->offset + sema->mem->start;
- BEGIN_RING(chan, NvSubSw, method, 1);
- OUT_RING(chan, 1);
-
- if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
- /*
- * Force the card to context switch, there may be
- * another channel waiting for the semaphore we just
- * released.
- */
- BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
- OUT_RING(chan, 0);
+ ret = RING_SPACE(chan, 5);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(chan, 2, NvSubM2MF, 0x0010, 4);
+ OUT_RING (chan, upper_32_bits(offset));
+ OUT_RING (chan, lower_32_bits(offset));
+ OUT_RING (chan, 1);
+ OUT_RING (chan, 0x1002); /* RELEASE */
}
/* Delay semaphore destruction until its work is done */
@@ -368,8 +438,7 @@ emit_semaphore(struct nouveau_channel *chan, int method,
kref_get(&sema->ref);
nouveau_fence_work(fence, semaphore_work, sema);
- nouveau_fence_unref((void *)&fence);
-
+ nouveau_fence_unref(&fence);
return 0;
}
@@ -380,33 +449,49 @@ nouveau_fence_sync(struct nouveau_fence *fence,
struct nouveau_channel *chan = nouveau_fence_channel(fence);
struct drm_device *dev = wchan->dev;
struct nouveau_semaphore *sema;
- int ret;
+ int ret = 0;
- if (likely(!fence || chan == wchan ||
- nouveau_fence_signalled(fence, NULL)))
- return 0;
+ if (likely(!chan || chan == wchan ||
+ nouveau_fence_signalled(fence)))
+ goto out;
- sema = alloc_semaphore(dev);
+ sema = semaphore_alloc(dev);
if (!sema) {
/* Early card or broken userspace, fall back to
* software sync. */
- return nouveau_fence_wait(fence, NULL, false, false);
+ ret = nouveau_fence_wait(fence, true, false);
+ goto out;
+ }
+
+ /* try to take chan's mutex, if we can't take it right away
+ * we have to fallback to software sync to prevent locking
+ * order issues
+ */
+ if (!mutex_trylock(&chan->mutex)) {
+ ret = nouveau_fence_wait(fence, true, false);
+ goto out_unref;
}
/* Make wchan wait until it gets signalled */
- ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
+ ret = semaphore_acquire(wchan, sema);
if (ret)
- goto out;
+ goto out_unlock;
/* Signal the semaphore from chan */
- ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
+ ret = semaphore_release(chan, sema);
+
+out_unlock:
+ mutex_unlock(&chan->mutex);
+out_unref:
+ kref_put(&sema->ref, semaphore_free);
out:
- kref_put(&sema->ref, free_semaphore);
+ if (chan)
+ nouveau_channel_put_unlocked(&chan);
return ret;
}
int
-nouveau_fence_flush(void *sync_obj, void *sync_arg)
+__nouveau_fence_flush(void *sync_obj, void *sync_arg)
{
return 0;
}
@@ -419,16 +504,15 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
struct nouveau_gpuobj *obj = NULL;
int ret;
- /* Create an NV_SW object for various sync purposes */
- ret = nouveau_gpuobj_sw_new(chan, NV_SW, &obj);
- if (ret)
- return ret;
+ if (dev_priv->card_type >= NV_C0)
+ goto out_initialised;
- ret = nouveau_ramht_insert(chan, NvSw, obj);
- nouveau_gpuobj_ref(NULL, &obj);
+ /* Create an NV_SW object for various sync purposes */
+ ret = nouveau_gpuobj_gr_new(chan, NvSw, NV_SW);
if (ret)
return ret;
+ /* we leave subchannel empty for nvc0 */
ret = RING_SPACE(chan, 2);
if (ret)
return ret;
@@ -436,14 +520,13 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
OUT_RING(chan, NvSw);
/* Create a DMA object for the shared cross-channel sync area. */
- if (USE_SEMA(dev)) {
- struct drm_mm_node *mem = dev_priv->fence.bo->bo.mem.mm_node;
+ if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
+ struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
mem->start << PAGE_SHIFT,
- mem->size << PAGE_SHIFT,
- NV_DMA_ACCESS_RW,
- NV_DMA_TARGET_VIDMEM, &obj);
+ mem->size, NV_MEM_ACCESS_RW,
+ NV_MEM_TARGET_VRAM, &obj);
if (ret)
return ret;
@@ -457,14 +540,20 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
return ret;
BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
OUT_RING(chan, NvSema);
+ } else {
+ ret = RING_SPACE(chan, 2);
+ if (ret)
+ return ret;
+ BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
+ OUT_RING (chan, chan->vram_handle); /* whole VM */
}
FIRE_RING(chan);
+out_initialised:
INIT_LIST_HEAD(&chan->fence.pending);
spin_lock_init(&chan->fence.lock);
atomic_set(&chan->fence.last_sequence_irq, 0);
-
return 0;
}
@@ -473,6 +562,8 @@ nouveau_fence_channel_fini(struct nouveau_channel *chan)
{
struct nouveau_fence *tmp, *fence;
+ spin_lock(&chan->fence.lock);
+
list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
fence->signalled = true;
list_del(&fence->entry);
@@ -482,18 +573,21 @@ nouveau_fence_channel_fini(struct nouveau_channel *chan)
kref_put(&fence->refcount, nouveau_fence_del);
}
+
+ spin_unlock(&chan->fence.lock);
}
int
nouveau_fence_init(struct drm_device *dev)
{
struct drm_nouveau_private *dev_priv = dev->dev_private;
+ int size = (dev_priv->chipset < 0x84) ? 4096 : 16384;
int ret;
/* Create a shared VRAM heap for cross-channel sync. */
if (USE_SEMA(dev)) {
- ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
- 0, 0, false, true, &dev_priv->fence.bo);
+ ret = nouveau_bo_new(dev, NULL, size, 0, TTM_PL_FLAG_VRAM,
+ 0, 0, &dev_priv->fence.bo);
if (ret)
return ret;