summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c227
1 files changed, 118 insertions, 109 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..1a0611bb576b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
#define BATCH_OFFSET_BIAS (256*1024)
@@ -94,7 +95,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
struct i915_address_space *vm,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = vm->dev->dev_private;
struct drm_i915_gem_object *obj;
struct list_head objects;
int i, ret;
@@ -129,20 +129,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
i = 0;
while (!list_empty(&objects)) {
struct i915_vma *vma;
- struct i915_address_space *bind_vm = vm;
-
- if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
- USES_FULL_PPGTT(vm->dev)) {
- ret = -EINVAL;
- goto err;
- }
-
- /* If we have secure dispatch, or the userspace assures us that
- * they know what they're doing, use the GGTT VM.
- */
- if (((args->flags & I915_EXEC_SECURE) &&
- (i == (args->buffer_count - 1))))
- bind_vm = &dev_priv->gtt.base;
obj = list_first_entry(&objects,
struct drm_i915_gem_object,
@@ -156,7 +142,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
* from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm.
*/
- vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
+ vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma)) {
DRM_DEBUG("Failed to lookup VMA\n");
ret = PTR_ERR(vma);
@@ -307,7 +293,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint64_t delta = reloc->delta + target_offset;
- uint32_t __iomem *reloc_entry;
+ uint64_t offset;
void __iomem *reloc_page;
int ret;
@@ -320,25 +306,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
return ret;
/* Map the page containing the relocation we're going to perform. */
- reloc->offset += i915_gem_obj_ggtt_offset(obj);
+ offset = i915_gem_obj_ggtt_offset(obj);
+ offset += reloc->offset;
reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
- reloc->offset & PAGE_MASK);
- reloc_entry = (uint32_t __iomem *)
- (reloc_page + offset_in_page(reloc->offset));
- iowrite32(lower_32_bits(delta), reloc_entry);
+ offset & PAGE_MASK);
+ iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
if (INTEL_INFO(dev)->gen >= 8) {
- reloc_entry += 1;
+ offset += sizeof(uint32_t);
- if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
+ if (offset_in_page(offset) == 0) {
io_mapping_unmap_atomic(reloc_page);
- reloc_page = io_mapping_map_atomic_wc(
- dev_priv->gtt.mappable,
- reloc->offset + sizeof(uint32_t));
- reloc_entry = reloc_page;
+ reloc_page =
+ io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
+ offset);
}
- iowrite32(upper_32_bits(delta), reloc_entry);
+ iowrite32(upper_32_bits(delta),
+ reloc_page + offset_in_page(offset));
}
io_mapping_unmap_atomic(reloc_page);
@@ -535,34 +520,18 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
bool *need_reloc)
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
uint64_t flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -574,17 +543,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
entry->flags |= __EXEC_OBJECT_HAS_PIN;
- if (has_fenced_gpu_access) {
- if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
- ret = i915_gem_object_get_fence(obj);
- if (ret)
- return ret;
-
- if (i915_gem_object_pin_fence(obj))
- entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ ret = i915_gem_object_get_fence(obj);
+ if (ret)
+ return ret;
- obj->pending_fenced_gpu_access = true;
- }
+ if (i915_gem_object_pin_fence(obj))
+ entry->flags |= __EXEC_OBJECT_HAS_FENCE;
}
if (entry->offset != vma->node.start) {
@@ -601,26 +566,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
}
static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- struct drm_i915_gem_object *obj = vma->obj;
- bool need_fence, need_mappable;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
+ if (entry->relocation_count == 0)
+ return false;
+
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
- WARN_ON((need_mappable || need_fence) &&
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_is_ggtt(vma->vm));
if (entry->alignment &&
vma->node.start & (entry->alignment - 1))
return true;
- if (need_mappable && !obj->map_and_fenceable)
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return true;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -642,9 +621,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
int retry;
- if (list_empty(vmas))
- return 0;
-
i915_gem_retire_requests_ring(ring);
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@@ -658,20 +634,21 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj = vma->obj;
entry = vma->exec_entry;
+ if (!has_fenced_gpu_access)
+ entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
need_fence =
- has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
obj->base.pending_write_domain = 0;
- obj->pending_fenced_gpu_access = false;
}
list_splice(&ordered_vmas, vmas);
@@ -696,7 +673,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
if (!drm_mm_node_allocated(&vma->node))
continue;
- if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+ if (eb_vma_misplaced(vma))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -744,9 +721,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
int i, total, ret;
unsigned count = args->buffer_count;
- if (WARN_ON(list_empty(&eb->vmas)))
- return 0;
-
vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
/* We may process another execbuffer during the unlock... */
@@ -890,18 +864,24 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
}
static int
-validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
+validate_exec_list(struct drm_device *dev,
+ struct drm_i915_gem_exec_object2 *exec,
int count)
{
- int i;
unsigned relocs_total = 0;
unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
+ unsigned invalid_flags;
+ int i;
+
+ invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
+ if (USES_FULL_PPGTT(dev))
+ invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
for (i = 0; i < count; i++) {
char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
int length; /* limited by fault_in_pages_readable() */
- if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
+ if (exec[i].flags & invalid_flags)
return -EINVAL;
/* First check for malicious input causing overflow in
@@ -951,16 +931,26 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
return ERR_PTR(-EIO);
}
+ if (i915.enable_execlists && !ctx->engine[ring->id].state) {
+ int ret = intel_lr_context_deferred_create(ctx, ring);
+ if (ret) {
+ DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
+ return ERR_PTR(ret);
+ }
+ }
+
return ctx;
}
-static void
+void
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
struct intel_engine_cs *ring)
{
+ u32 seqno = intel_ring_get_seqno(ring);
struct i915_vma *vma;
list_for_each_entry(vma, vmas, exec_list) {
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
@@ -969,24 +959,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
if (obj->base.write_domain == 0)
obj->base.pending_read_domains |= obj->base.read_domains;
obj->base.read_domains = obj->base.pending_read_domains;
- obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
i915_vma_move_to_active(vma, ring);
if (obj->base.write_domain) {
obj->dirty = 1;
- obj->last_write_seqno = intel_ring_get_seqno(ring);
+ obj->last_write_seqno = seqno;
intel_fb_obj_invalidate(obj, ring);
/* update for the implicit flush after a batch */
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
}
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ obj->last_fenced_seqno = seqno;
+ if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+ struct drm_i915_private *dev_priv = to_i915(ring->dev);
+ list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+ &dev_priv->mm.fence_list);
+ }
+ }
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
}
-static void
+void
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
struct drm_file *file,
struct intel_engine_cs *ring,
@@ -1026,14 +1023,14 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
return 0;
}
-static int
-legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
- struct intel_engine_cs *ring,
- struct intel_context *ctx,
- struct drm_i915_gem_execbuffer2 *args,
- struct list_head *vmas,
- struct drm_i915_gem_object *batch_obj,
- u64 exec_start, u32 flags)
+int
+i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
+ struct intel_engine_cs *ring,
+ struct intel_context *ctx,
+ struct drm_i915_gem_execbuffer2 *args,
+ struct list_head *vmas,
+ struct drm_i915_gem_object *batch_obj,
+ u64 exec_start, u32 flags)
{
struct drm_clip_rect *cliprects = NULL;
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1254,7 +1251,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (!i915_gem_check_execbuffer(args))
return -EINVAL;
- ret = validate_exec_list(exec, args->buffer_count);
+ ret = validate_exec_list(dev, exec, args->buffer_count);
if (ret)
return ret;
@@ -1318,8 +1315,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_context_reference(ctx);
- vm = ctx->vm;
- if (!USES_FULL_PPGTT(dev))
+ if (ctx->ppgtt)
+ vm = &ctx->ppgtt->base;
+ else
vm = &dev_priv->gtt.base;
eb = eb_create(args);
@@ -1386,25 +1384,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ /*
+ * So on first glance it looks freaky that we pin the batch here
+ * outside of the reservation loop. But:
+ * - The batch is already pinned into the relevant ppgtt, so we
+ * already have the backing storage fully allocated.
+ * - No other BO uses the global gtt (well contexts, but meh),
+ * so we don't really have issues with mutliple objects not
+ * fitting due to fragmentation.
+ * So this is actually safe.
+ */
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
- ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
- args, &eb->vmas, batch_obj, exec_start, flags);
- if (ret)
- goto err;
+ ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
+ &eb->vmas, batch_obj, exec_start, flags);
+ /*
+ * FIXME: We crucially rely upon the active tracking for the (ppgtt)
+ * batch vma for correctness. For less ugly and less fragility this
+ * needs to be adjusted to also track the ggtt batch vma properly as
+ * active.
+ */
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);