|
@@ -262,6 +262,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int
|
|
|
+gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
|
|
|
+{
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = intel_ring_begin(ring, 4);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
|
|
|
+ intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
|
|
|
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
|
|
+ intel_ring_emit(ring, 0);
|
|
|
+ intel_ring_emit(ring, 0);
|
|
|
+ intel_ring_advance(ring);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
gen7_render_ring_flush(struct intel_ring_buffer *ring,
|
|
|
u32 invalidate_domains, u32 flush_domains)
|
|
@@ -271,6 +290,16 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
|
|
|
u32 scratch_addr = pc->gtt_offset + 128;
|
|
|
int ret;
|
|
|
|
|
|
+ /*
|
|
|
+ * Ensure that any following seqno writes only happen when the render
|
|
|
+ * cache is indeed flushed.
|
|
|
+ *
|
|
|
+ * Workaround: 4th PIPE_CONTROL command (except the ones with only
|
|
|
+ * read-cache invalidate bits set) must have the CS_STALL bit set. We
|
|
|
+ * don't try to be clever and just set it unconditionally.
|
|
|
+ */
|
|
|
+ flags |= PIPE_CONTROL_CS_STALL;
|
|
|
+
|
|
|
/* Just flush everything. Experiments have shown that reducing the
|
|
|
* number of bits based on the write domains has little performance
|
|
|
* impact.
|
|
@@ -278,11 +307,6 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
|
|
|
if (flush_domains) {
|
|
|
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
|
|
|
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
|
|
|
- /*
|
|
|
- * Ensure that any following seqno writes only happen
|
|
|
- * when the render cache is indeed flushed.
|
|
|
- */
|
|
|
- flags |= PIPE_CONTROL_CS_STALL;
|
|
|
}
|
|
|
if (invalidate_domains) {
|
|
|
flags |= PIPE_CONTROL_TLB_INVALIDATE;
|
|
@@ -295,6 +319,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
|
|
|
* TLB invalidate requires a post-sync write.
|
|
|
*/
|
|
|
flags |= PIPE_CONTROL_QW_WRITE;
|
|
|
+
|
|
|
+ /* Workaround: we must issue a pipe_control with CS-stall bit
|
|
|
+ * set before a pipe_control command that has the state cache
|
|
|
+ * invalidate bit set. */
|
|
|
+ gen7_render_ring_cs_stall_wa(ring);
|
|
|
}
|
|
|
|
|
|
ret = intel_ring_begin(ring, 4);
|