summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c191
1 files changed, 165 insertions, 26 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a087e1bf0c2f..d1cd8b89f47d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1763,8 +1763,11 @@ i915_add_request(struct intel_ring_buffer *ring,
ring->outstanding_lazy_request = false;
if (!dev_priv->mm.suspended) {
- mod_timer(&dev_priv->hangcheck_timer,
- jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ if (i915_enable_hangcheck) {
+ mod_timer(&dev_priv->hangcheck_timer,
+ jiffies +
+ msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ }
if (was_empty)
queue_delayed_work(dev_priv->wq,
&dev_priv->mm.retire_work, HZ);
@@ -2135,6 +2138,30 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
return 0;
}
+static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
+{
+ u32 old_write_domain, old_read_domains;
+
+ /* Act a barrier for all accesses through the GTT */
+ mb();
+
+ /* Force a pagefault for domain tracking on next user access */
+ i915_gem_release_mmap(obj);
+
+ if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
+ return;
+
+ old_read_domains = obj->base.read_domains;
+ old_write_domain = obj->base.write_domain;
+
+ obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
+ obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+}
+
/**
* Unbinds an object from the GTT aperture.
*/
@@ -2151,23 +2178,28 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
return -EINVAL;
}
- /* blow away mappings if mapped through GTT */
- i915_gem_release_mmap(obj);
-
- /* Move the object to the CPU domain to ensure that
- * any possible CPU writes while it's not in the GTT
- * are flushed when we go to remap it. This will
- * also ensure that all pending GPU writes are finished
- * before we unbind.
- */
- ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ ret = i915_gem_object_finish_gpu(obj);
if (ret == -ERESTARTSYS)
return ret;
/* Continue on if we fail due to EIO, the GPU is hung so we
* should be safe and we need to cleanup or else we might
* cause memory corruption through use-after-free.
*/
+
+ i915_gem_object_finish_gtt(obj);
+
+ /* Move the object to the CPU domain to ensure that
+ * any possible CPU writes while it's not in the GTT
+ * are flushed when we go to remap it.
+ */
+ if (ret == 0)
+ ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+ if (ret == -ERESTARTSYS)
+ return ret;
if (ret) {
+ /* In the event of a disaster, abandon all caches and
+ * hope for the best.
+ */
i915_gem_clflush_object(obj);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -2996,51 +3028,139 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
return 0;
}
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+ enum i915_cache_level cache_level)
+{
+ int ret;
+
+ if (obj->cache_level == cache_level)
+ return 0;
+
+ if (obj->pin_count) {
+ DRM_DEBUG("can not change the cache level of pinned objects\n");
+ return -EBUSY;
+ }
+
+ if (obj->gtt_space) {
+ ret = i915_gem_object_finish_gpu(obj);
+ if (ret)
+ return ret;
+
+ i915_gem_object_finish_gtt(obj);
+
+ /* Before SandyBridge, you could not use tiling or fence
+ * registers with snooped memory, so relinquish any fences
+ * currently pointing to our region in the aperture.
+ */
+ if (INTEL_INFO(obj->base.dev)->gen < 6) {
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ return ret;
+ }
+
+ i915_gem_gtt_rebind_object(obj, cache_level);
+ }
+
+ if (cache_level == I915_CACHE_NONE) {
+ u32 old_read_domains, old_write_domain;
+
+ /* If we're coming from LLC cached, then we haven't
+ * actually been tracking whether the data is in the
+ * CPU cache or not, since we only allow one bit set
+ * in obj->write_domain and have been skipping the clflushes.
+ * Just set it to the CPU cache for now.
+ */
+ WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
+ WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
+
+ old_read_domains = obj->base.read_domains;
+ old_write_domain = obj->base.write_domain;
+
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+ trace_i915_gem_object_change_domain(obj,
+ old_read_domains,
+ old_write_domain);
+ }
+
+ obj->cache_level = cache_level;
+ return 0;
+}
+
/*
- * Prepare buffer for display plane. Use uninterruptible for possible flush
- * wait, as in modesetting process we're not supposed to be interrupted.
+ * Prepare buffer for display plane (scanout, cursors, etc).
+ * Can be called from an uninterruptible phase (modesetting) and allows
+ * any flushes to be pipelined (for pageflips).
+ *
+ * For the display plane, we want to be in the GTT but out of any write
+ * domains. So in many ways this looks like set_to_gtt_domain() apart from the
+ * ability to pipeline the waits, pinning and any additional subtleties
+ * that may differentiate the display plane from ordinary buffers.
*/
int
-i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+ u32 alignment,
struct intel_ring_buffer *pipelined)
{
- uint32_t old_read_domains;
+ u32 old_read_domains, old_write_domain;
int ret;
- /* Not valid to be called on unbound objects. */
- if (obj->gtt_space == NULL)
- return -EINVAL;
-
ret = i915_gem_object_flush_gpu_write_domain(obj);
if (ret)
return ret;
-
- /* Currently, we are always called from an non-interruptible context. */
if (pipelined != obj->ring) {
ret = i915_gem_object_wait_rendering(obj);
if (ret)
return ret;
}
+ /* The display engine is not coherent with the LLC cache on gen6. As
+ * a result, we make sure that the pinning that is about to occur is
+ * done with uncached PTEs. This is lowest common denominator for all
+ * chipsets.
+ *
+ * However for gen6+, we could do better by using the GFDT bit instead
+ * of uncaching, which would allow us to flush all the LLC-cached data
+ * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+ */
+ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+ if (ret)
+ return ret;
+
+ /* As the user may map the buffer once pinned in the display plane
+ * (e.g. libkms for the bootup splash), we have to ensure that we
+ * always use map_and_fenceable for all scanout buffers.
+ */
+ ret = i915_gem_object_pin(obj, alignment, true);
+ if (ret)
+ return ret;
+
i915_gem_object_flush_cpu_write_domain(obj);
+ old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
+
+ /* It should now be out of any other write domains, and we can update
+ * the domain values for our changes.
+ */
+ BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
trace_i915_gem_object_change_domain(obj,
old_read_domains,
- obj->base.write_domain);
+ old_write_domain);
return 0;
}
int
-i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
+i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
{
int ret;
- if (!obj->active)
+ if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
return 0;
if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
@@ -3049,6 +3169,9 @@ i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj)
return ret;
}
+ /* Ensure that we invalidate the GPU's caches and TLBs. */
+ obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
+
return i915_gem_object_wait_rendering(obj);
}
@@ -3575,7 +3698,23 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_NONE;
+ if (IS_GEN6(dev)) {
+ /* On Gen6, we can have the GPU use the LLC (the CPU
+ * cache) for about a 10% performance improvement
+ * compared to uncached. Graphics requests other than
+ * display scanout are coherent with the CPU in
+ * accessing this cache. This means in this mode we
+ * don't need to clflush on the CPU side, and on the
+ * GPU side we only need to flush internal caches to
+ * get data visible to the CPU.
+ *
+ * However, we maintain the display planes as UC, and so
+ * need to rebind when first used as such.
+ */
+ obj->cache_level = I915_CACHE_LLC;
+ } else
+ obj->cache_level = I915_CACHE_NONE;
+
obj->base.driver_private = NULL;
obj->fence_reg = I915_FENCE_REG_NONE;
INIT_LIST_HEAD(&obj->mm_list);