drm/i915: Record the tail at each request and use it to estimate the head

By recording the location of every request in the ringbuffer, we know that in order to retire the request the GPU must have finished reading it and so the GPU head is now beyond the tail of the request. We can therefore provide a conservative estimate of where the GPU is reading from in order to avoid having to read back the ring buffer registers when polling for space upon starting a new write into the ringbuffer. A secondary effect is that this allows us to convert intel_ring_buffer_wait() to use i915_wait_request() and so consolidate upon the single function to handle the complicated task of waiting upon the GPU. A necessary precaution is that we need to make that wait uninterruptible to match the existing conditions as all the callers of intel_ring_begin() have not been audited to handle ERESTARTSYS correctly. By using a conservative estimate for the head, and always processing all outstanding requests first, we prevent a race condition between using the estimate and direct reads of I915_RING_HEAD which could result in the value of the head going backwards, and the tail overflowing once again. We are also careful to mark any request that we skip over in order to free space in ring as consumed which provides a self-consistency check. Given sufficient abuse, such as a set of unthrottled GPU bound cairo-traces, avoiding the use of I915_RING_HEAD gives a 10-20% boost on Sandy Bridge (i5-2520m): firefox-paintball 18927ms -> 15646ms: 1.21x speedup firefox-fishtank 12563ms -> 11278ms: 1.11x speedup which is a mild consolation for the performance those traces achieved from exploiting the buggy autoreported head. v2: Add a few more comments and make request->tail a conservative estimate as suggested by Daniel Vetter. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: resolve conflicts with retirement defering and the lack of the autoreport head removal (that will go in through -fixes).] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
author: Chris Wilson <chris@chris-wilson.co.uk> 2012-02-15 11:25:36 +0000
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2012-02-15 14:26:03 +0100
commit: a71d8d94525e8fd855c0466fb586ae1cb008f3a2 (patch)
tree: 816d919b02d90a28b1c7b25d05deed73dca22774 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent: 7c26e5c6edaec70f12984f7a3020864cc21e6fec (diff)
1 files changed, 83 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e784ebb8cc27..ca3972f2c6f5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -583,6 +583,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 128;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
+
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
@@ -1107,11 +1108,89 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
 	return 0;
 }
 
+static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	bool was_interruptible;
+	int ret;
+
+	/* XXX As we have not yet audited all the paths to check that
+	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
+	 * allow us to be interruptible by a signal.
+	 */
+	was_interruptible = dev_priv->mm.interruptible;
+	dev_priv->mm.interruptible = false;
+
+	ret = i915_wait_request(ring, seqno, true);
+
+	dev_priv->mm.interruptible = was_interruptible;
+
+	return ret;
+}
+
+static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
+{
+	struct drm_i915_gem_request *request;
+	u32 seqno = 0;
+	int ret;
+
+	i915_gem_retire_requests_ring(ring);
+
+	if (ring->last_retired_head != -1) {
+		ring->head = ring->last_retired_head;
+		ring->last_retired_head = -1;
+		ring->space = ring_space(ring);
+		if (ring->space >= n)
+			return 0;
+	}
+
+	list_for_each_entry(request, &ring->request_list, list) {
+		int space;
+
+		if (request->tail == -1)
+			continue;
+
+		space = request->tail - (ring->tail + 8);
+		if (space < 0)
+			space += ring->size;
+		if (space >= n) {
+			seqno = request->seqno;
+			break;
+		}
+
+		/* Consume this request in case we need more space than
+		 * is available and so need to prevent a race between
+		 * updating last_retired_head and direct reads of
+		 * I915_RING_HEAD. It also provides a nice sanity check.
+		 */
+		request->tail = -1;
+	}
+
+	if (seqno == 0)
+		return -ENOSPC;
+
+	ret = intel_ring_wait_seqno(ring, seqno);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(ring->last_retired_head == -1))
+		return -ENOSPC;
+
+	ring->head = ring->last_retired_head;
+	ring->last_retired_head = -1;
+	ring->space = ring_space(ring);
+	if (WARN_ON(ring->space < n))
+		return -ENOSPC;
+
+	return 0;
+}
+
 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long end;
+	int ret;
 	u32 head;
 
 	/* If the reported head position has wrapped or hasn't advanced,
@@ -1125,6 +1204,10 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
 			return 0;
 	}
 
+	ret = intel_ring_wait_request(ring, n);
+	if (ret != -ENOSPC)
+		return ret;
+
 	trace_i915_ring_wait_begin(ring);
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		/* With GEM the hangcheck timer should kick us out of the loop,
author	Chris Wilson <chris@chris-wilson.co.uk>	2012-02-15 11:25:36 +0000
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2012-02-15 14:26:03 +0100
commit	a71d8d94525e8fd855c0466fb586ae1cb008f3a2 (patch)
tree	816d919b02d90a28b1c7b25d05deed73dca22774 /drivers/gpu/drm/i915/intel_ringbuffer.c
parent	7c26e5c6edaec70f12984f7a3020864cc21e6fec (diff)