|
@@ -34,180 +34,6 @@
|
|
|
#include "intel_drv.h"
|
|
|
#include <linux/dma_remapping.h>
|
|
|
|
|
|
-struct change_domains {
|
|
|
- uint32_t invalidate_domains;
|
|
|
- uint32_t flush_domains;
|
|
|
- uint32_t flush_rings;
|
|
|
- uint32_t flips;
|
|
|
-};
|
|
|
-
|
|
|
-/*
|
|
|
- * Set the next domain for the specified object. This
|
|
|
- * may not actually perform the necessary flushing/invaliding though,
|
|
|
- * as that may want to be batched with other set_domain operations
|
|
|
- *
|
|
|
- * This is (we hope) the only really tricky part of gem. The goal
|
|
|
- * is fairly simple -- track which caches hold bits of the object
|
|
|
- * and make sure they remain coherent. A few concrete examples may
|
|
|
- * help to explain how it works. For shorthand, we use the notation
|
|
|
- * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
|
|
|
- * a pair of read and write domain masks.
|
|
|
- *
|
|
|
- * Case 1: the batch buffer
|
|
|
- *
|
|
|
- * 1. Allocated
|
|
|
- * 2. Written by CPU
|
|
|
- * 3. Mapped to GTT
|
|
|
- * 4. Read by GPU
|
|
|
- * 5. Unmapped from GTT
|
|
|
- * 6. Freed
|
|
|
- *
|
|
|
- * Let's take these a step at a time
|
|
|
- *
|
|
|
- * 1. Allocated
|
|
|
- * Pages allocated from the kernel may still have
|
|
|
- * cache contents, so we set them to (CPU, CPU) always.
|
|
|
- * 2. Written by CPU (using pwrite)
|
|
|
- * The pwrite function calls set_domain (CPU, CPU) and
|
|
|
- * this function does nothing (as nothing changes)
|
|
|
- * 3. Mapped by GTT
|
|
|
- * This function asserts that the object is not
|
|
|
- * currently in any GPU-based read or write domains
|
|
|
- * 4. Read by GPU
|
|
|
- * i915_gem_execbuffer calls set_domain (COMMAND, 0).
|
|
|
- * As write_domain is zero, this function adds in the
|
|
|
- * current read domains (CPU+COMMAND, 0).
|
|
|
- * flush_domains is set to CPU.
|
|
|
- * invalidate_domains is set to COMMAND
|
|
|
- * clflush is run to get data out of the CPU caches
|
|
|
- * then i915_dev_set_domain calls i915_gem_flush to
|
|
|
- * emit an MI_FLUSH and drm_agp_chipset_flush
|
|
|
- * 5. Unmapped from GTT
|
|
|
- * i915_gem_object_unbind calls set_domain (CPU, CPU)
|
|
|
- * flush_domains and invalidate_domains end up both zero
|
|
|
- * so no flushing/invalidating happens
|
|
|
- * 6. Freed
|
|
|
- * yay, done
|
|
|
- *
|
|
|
- * Case 2: The shared render buffer
|
|
|
- *
|
|
|
- * 1. Allocated
|
|
|
- * 2. Mapped to GTT
|
|
|
- * 3. Read/written by GPU
|
|
|
- * 4. set_domain to (CPU,CPU)
|
|
|
- * 5. Read/written by CPU
|
|
|
- * 6. Read/written by GPU
|
|
|
- *
|
|
|
- * 1. Allocated
|
|
|
- * Same as last example, (CPU, CPU)
|
|
|
- * 2. Mapped to GTT
|
|
|
- * Nothing changes (assertions find that it is not in the GPU)
|
|
|
- * 3. Read/written by GPU
|
|
|
- * execbuffer calls set_domain (RENDER, RENDER)
|
|
|
- * flush_domains gets CPU
|
|
|
- * invalidate_domains gets GPU
|
|
|
- * clflush (obj)
|
|
|
- * MI_FLUSH and drm_agp_chipset_flush
|
|
|
- * 4. set_domain (CPU, CPU)
|
|
|
- * flush_domains gets GPU
|
|
|
- * invalidate_domains gets CPU
|
|
|
- * wait_rendering (obj) to make sure all drawing is complete.
|
|
|
- * This will include an MI_FLUSH to get the data from GPU
|
|
|
- * to memory
|
|
|
- * clflush (obj) to invalidate the CPU cache
|
|
|
- * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
|
|
|
- * 5. Read/written by CPU
|
|
|
- * cache lines are loaded and dirtied
|
|
|
- * 6. Read written by GPU
|
|
|
- * Same as last GPU access
|
|
|
- *
|
|
|
- * Case 3: The constant buffer
|
|
|
- *
|
|
|
- * 1. Allocated
|
|
|
- * 2. Written by CPU
|
|
|
- * 3. Read by GPU
|
|
|
- * 4. Updated (written) by CPU again
|
|
|
- * 5. Read by GPU
|
|
|
- *
|
|
|
- * 1. Allocated
|
|
|
- * (CPU, CPU)
|
|
|
- * 2. Written by CPU
|
|
|
- * (CPU, CPU)
|
|
|
- * 3. Read by GPU
|
|
|
- * (CPU+RENDER, 0)
|
|
|
- * flush_domains = CPU
|
|
|
- * invalidate_domains = RENDER
|
|
|
- * clflush (obj)
|
|
|
- * MI_FLUSH
|
|
|
- * drm_agp_chipset_flush
|
|
|
- * 4. Updated (written) by CPU again
|
|
|
- * (CPU, CPU)
|
|
|
- * flush_domains = 0 (no previous write domain)
|
|
|
- * invalidate_domains = 0 (no new read domains)
|
|
|
- * 5. Read by GPU
|
|
|
- * (CPU+RENDER, 0)
|
|
|
- * flush_domains = CPU
|
|
|
- * invalidate_domains = RENDER
|
|
|
- * clflush (obj)
|
|
|
- * MI_FLUSH
|
|
|
- * drm_agp_chipset_flush
|
|
|
- */
|
|
|
-static void
|
|
|
-i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
|
|
|
- struct intel_ring_buffer *ring,
|
|
|
- struct change_domains *cd)
|
|
|
-{
|
|
|
- uint32_t invalidate_domains = 0, flush_domains = 0;
|
|
|
-
|
|
|
- /*
|
|
|
- * If the object isn't moving to a new write domain,
|
|
|
- * let the object stay in multiple read domains
|
|
|
- */
|
|
|
- if (obj->base.pending_write_domain == 0)
|
|
|
- obj->base.pending_read_domains |= obj->base.read_domains;
|
|
|
-
|
|
|
- /*
|
|
|
- * Flush the current write domain if
|
|
|
- * the new read domains don't match. Invalidate
|
|
|
- * any read domains which differ from the old
|
|
|
- * write domain
|
|
|
- */
|
|
|
- if (obj->base.write_domain &&
|
|
|
- (((obj->base.write_domain != obj->base.pending_read_domains ||
|
|
|
- obj->ring != ring)) ||
|
|
|
- (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
|
|
|
- flush_domains |= obj->base.write_domain;
|
|
|
- invalidate_domains |=
|
|
|
- obj->base.pending_read_domains & ~obj->base.write_domain;
|
|
|
- }
|
|
|
- /*
|
|
|
- * Invalidate any read caches which may have
|
|
|
- * stale data. That is, any new read domains.
|
|
|
- */
|
|
|
- invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
|
|
|
- if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
|
|
|
- i915_gem_clflush_object(obj);
|
|
|
-
|
|
|
- if (obj->base.pending_write_domain)
|
|
|
- cd->flips |= atomic_read(&obj->pending_flip);
|
|
|
-
|
|
|
- /* The actual obj->write_domain will be updated with
|
|
|
- * pending_write_domain after we emit the accumulated flush for all
|
|
|
- * of our domain changes in execbuffers (which clears objects'
|
|
|
- * write_domains). So if we have a current write domain that we
|
|
|
- * aren't changing, set pending_write_domain to that.
|
|
|
- */
|
|
|
- if (flush_domains == 0 && obj->base.pending_write_domain == 0)
|
|
|
- obj->base.pending_write_domain = obj->base.write_domain;
|
|
|
-
|
|
|
- cd->invalidate_domains |= invalidate_domains;
|
|
|
- cd->flush_domains |= flush_domains;
|
|
|
- if (flush_domains & I915_GEM_GPU_DOMAINS)
|
|
|
- cd->flush_rings |= intel_ring_flag(obj->ring);
|
|
|
- if (invalidate_domains & I915_GEM_GPU_DOMAINS)
|
|
|
- cd->flush_rings |= intel_ring_flag(ring);
|
|
|
-}
|
|
|
-
|
|
|
struct eb_objects {
|
|
|
int and;
|
|
|
struct hlist_head buckets[0];
|
|
@@ -810,18 +636,6 @@ err:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static void
|
|
|
-i915_gem_execbuffer_flush(struct drm_device *dev,
|
|
|
- uint32_t invalidate_domains,
|
|
|
- uint32_t flush_domains)
|
|
|
-{
|
|
|
- if (flush_domains & I915_GEM_DOMAIN_CPU)
|
|
|
- intel_gtt_chipset_flush();
|
|
|
-
|
|
|
- if (flush_domains & I915_GEM_DOMAIN_GTT)
|
|
|
- wmb();
|
|
|
-}
|
|
|
-
|
|
|
static int
|
|
|
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
|
|
|
{
|
|
@@ -854,37 +668,41 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
static int
|
|
|
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
|
|
|
struct list_head *objects)
|
|
|
{
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
- struct change_domains cd;
|
|
|
+ uint32_t flush_domains = 0;
|
|
|
+ uint32_t flips = 0;
|
|
|
int ret;
|
|
|
|
|
|
- memset(&cd, 0, sizeof(cd));
|
|
|
- list_for_each_entry(obj, objects, exec_list)
|
|
|
- i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
|
|
|
-
|
|
|
- if (cd.invalidate_domains | cd.flush_domains) {
|
|
|
- i915_gem_execbuffer_flush(ring->dev,
|
|
|
- cd.invalidate_domains,
|
|
|
- cd.flush_domains);
|
|
|
- }
|
|
|
-
|
|
|
- if (cd.flips) {
|
|
|
- ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
|
|
|
+ list_for_each_entry(obj, objects, exec_list) {
|
|
|
+ ret = i915_gem_object_sync(obj, ring);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
+
|
|
|
+ if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
|
|
|
+ i915_gem_clflush_object(obj);
|
|
|
+
|
|
|
+ if (obj->base.pending_write_domain)
|
|
|
+ flips |= atomic_read(&obj->pending_flip);
|
|
|
+
|
|
|
+ flush_domains |= obj->base.write_domain;
|
|
|
}
|
|
|
|
|
|
- list_for_each_entry(obj, objects, exec_list) {
|
|
|
- ret = i915_gem_object_sync(obj, ring);
|
|
|
+ if (flips) {
|
|
|
+ ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+ if (flush_domains & I915_GEM_DOMAIN_CPU)
|
|
|
+ intel_gtt_chipset_flush();
|
|
|
+
|
|
|
+ if (flush_domains & I915_GEM_DOMAIN_GTT)
|
|
|
+ wmb();
|
|
|
+
|
|
|
/* Unconditionally invalidate gpu caches and ensure that we do flush
|
|
|
* any residual writes from the previous batch.
|
|
|
*/
|