|
@@ -37,7 +37,8 @@
|
|
|
#include <linux/dma-buf.h>
|
|
|
|
|
|
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
|
|
|
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
|
|
|
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
|
|
|
+ bool force);
|
|
|
static __must_check int
|
|
|
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
|
|
|
struct i915_address_space *vm,
|
|
@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
|
|
|
return HAS_LLC(dev) || level != I915_CACHE_NONE;
|
|
|
}
|
|
|
|
|
|
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
|
|
|
+{
|
|
|
+ if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return obj->pin_display;
|
|
|
+}
|
|
|
+
|
|
|
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
|
|
|
{
|
|
|
if (obj->tiling_mode)
|
|
@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
|
|
|
* write domain and manually flush cachelines (if required). This
|
|
|
* optimizes for the case when the gpu will use the data
|
|
|
* right away and we therefore have to clflush anyway. */
|
|
|
- if (obj->cache_level == I915_CACHE_NONE)
|
|
|
- needs_clflush_after = 1;
|
|
|
+ needs_clflush_after = cpu_write_needs_clflush(obj);
|
|
|
if (i915_gem_obj_bound_any(obj)) {
|
|
|
ret = i915_gem_object_set_to_gtt_domain(obj, true);
|
|
|
if (ret)
|
|
@@ -833,7 +841,7 @@ out:
|
|
|
*/
|
|
|
if (!needs_clflush_after &&
|
|
|
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
|
|
|
- i915_gem_clflush_object(obj);
|
|
|
+ i915_gem_clflush_object(obj, obj->pin_display);
|
|
|
i915_gem_chipset_flush(dev);
|
|
|
}
|
|
|
}
|
|
@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
- if (obj->cache_level == I915_CACHE_NONE &&
|
|
|
- obj->tiling_mode == I915_TILING_NONE &&
|
|
|
- obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
|
|
|
+ if (obj->tiling_mode == I915_TILING_NONE &&
|
|
|
+ obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
|
|
|
+ cpu_write_needs_clflush(obj)) {
|
|
|
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
|
|
|
/* Note that the gtt paths might fail with non-page-backed user
|
|
|
* pointers (e.g. gtt mappings when moving data between
|
|
@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
|
|
|
}
|
|
|
|
|
|
/* Pinned buffers may be scanout, so flush the cache */
|
|
|
- if (obj->pin_count)
|
|
|
- i915_gem_object_flush_cpu_write_domain(obj);
|
|
|
+ if (obj->pin_display)
|
|
|
+ i915_gem_object_flush_cpu_write_domain(obj, true);
|
|
|
|
|
|
drm_gem_object_unreference(&obj->base);
|
|
|
unlock:
|
|
@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
|
|
|
* hope for the best.
|
|
|
*/
|
|
|
WARN_ON(ret != -EIO);
|
|
|
- i915_gem_clflush_object(obj);
|
|
|
+ i915_gem_clflush_object(obj, true);
|
|
|
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
|
|
|
}
|
|
|
|
|
@@ -3217,7 +3225,8 @@ err_unpin:
|
|
|
}
|
|
|
|
|
|
void
|
|
|
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
|
|
|
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
|
|
|
+ bool force)
|
|
|
{
|
|
|
/* If we don't have a page list set up, then we're not pinned
|
|
|
* to GPU, and we can ignore the cache flush because it'll happen
|
|
@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
|
|
|
* snooping behaviour occurs naturally as the result of our domain
|
|
|
* tracking.
|
|
|
*/
|
|
|
- if (obj->cache_level != I915_CACHE_NONE)
|
|
|
+ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
|
|
|
return;
|
|
|
|
|
|
trace_i915_gem_object_clflush(obj);
|
|
@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
|
|
|
|
|
|
/** Flushes the CPU write domain for the object if it's dirty. */
|
|
|
static void
|
|
|
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
|
|
|
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
|
|
|
+ bool force)
|
|
|
{
|
|
|
uint32_t old_write_domain;
|
|
|
|
|
|
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
|
|
|
return;
|
|
|
|
|
|
- i915_gem_clflush_object(obj);
|
|
|
+ i915_gem_clflush_object(obj, force);
|
|
|
i915_gem_chipset_flush(obj->base.dev);
|
|
|
old_write_domain = obj->base.write_domain;
|
|
|
obj->base.write_domain = 0;
|
|
@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
|
- i915_gem_object_flush_cpu_write_domain(obj);
|
|
|
+ i915_gem_object_flush_cpu_write_domain(obj, false);
|
|
|
|
|
|
/* Serialise direct access to this object with the barriers for
|
|
|
* coherent writes from the GPU, by effectively invalidating the
|
|
@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
|
|
obj, cache_level);
|
|
|
}
|
|
|
|
|
|
- if (cache_level == I915_CACHE_NONE) {
|
|
|
+ list_for_each_entry(vma, &obj->vma_list, vma_link)
|
|
|
+ vma->node.color = cache_level;
|
|
|
+ obj->cache_level = cache_level;
|
|
|
+
|
|
|
+ if (cpu_write_needs_clflush(obj)) {
|
|
|
u32 old_read_domains, old_write_domain;
|
|
|
|
|
|
/* If we're coming from LLC cached, then we haven't
|
|
@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
|
|
old_write_domain);
|
|
|
}
|
|
|
|
|
|
- list_for_each_entry(vma, &obj->vma_list, vma_link)
|
|
|
- vma->node.color = cache_level;
|
|
|
- obj->cache_level = cache_level;
|
|
|
i915_gem_verify_gtt(dev);
|
|
|
return 0;
|
|
|
}
|
|
@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
|
|
if (ret)
|
|
|
goto err_unpin_display;
|
|
|
|
|
|
- i915_gem_object_flush_cpu_write_domain(obj);
|
|
|
+ i915_gem_object_flush_cpu_write_domain(obj, true);
|
|
|
|
|
|
old_write_domain = obj->base.write_domain;
|
|
|
old_read_domains = obj->base.read_domains;
|
|
@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
|
|
|
|
|
|
/* Flush the CPU cache if it's still invalid. */
|
|
|
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
|
|
|
- if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
|
|
|
- i915_gem_clflush_object(obj);
|
|
|
+ i915_gem_clflush_object(obj, false);
|
|
|
|
|
|
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
|
|
|
}
|
|
@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
|
|
|
obj->user_pin_count++;
|
|
|
obj->pin_filp = file;
|
|
|
|
|
|
- /* XXX - flush the CPU caches for pinned objects
|
|
|
- * as the X server doesn't manage domains yet
|
|
|
- */
|
|
|
- i915_gem_object_flush_cpu_write_domain(obj);
|
|
|
args->offset = i915_gem_obj_ggtt_offset(obj);
|
|
|
out:
|
|
|
drm_gem_object_unreference(&obj->base);
|