Эх сурвалжийг харах

Merge branch 'drm-ttm-next' into drm-core-next

* drm-ttm-next:
  drm/radeon: Use the ttm execbuf utilities
  drm/ttm: Fix up io_mem_reserve / io_mem_free calling
  drm/ttm/vmwgfx: Have TTM manage the validation sequence.
  drm/ttm: Improved fencing of buffer object lists
  drm/ttm/radeon/nouveau: Kill the bo lock in favour of a bo device fence_lock
  drm/ttm: Don't deadlock on recursive multi-bo reservations
  drm/ttm: Optimize ttm_eu_backoff_reservation
  drm/ttm: Use kref_sub instead of repeatedly calling kref_put
  kref: Add a kref_sub function
  drm/ttm: Add a bo list reserve fastpath (v2)
Dave Airlie 14 жил өмнө
parent
commit
a9979d6077

+ 6 - 6
drivers/gpu/drm/nouveau/nouveau_gem.c

@@ -234,10 +234,10 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
 		if (likely(fence)) {
 			struct nouveau_fence *prev_fence;
 
-			spin_lock(&nvbo->bo.lock);
+			spin_lock(&nvbo->bo.bdev->fence_lock);
 			prev_fence = nvbo->bo.sync_obj;
 			nvbo->bo.sync_obj = nouveau_fence_ref(fence);
-			spin_unlock(&nvbo->bo.lock);
+			spin_unlock(&nvbo->bo.bdev->fence_lock);
 			nouveau_fence_unref((void *)&prev_fence);
 		}
 
@@ -557,9 +557,9 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
 				data |= r->vor;
 		}
 
-		spin_lock(&nvbo->bo.lock);
+		spin_lock(&nvbo->bo.bdev->fence_lock);
 		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
-		spin_unlock(&nvbo->bo.lock);
+		spin_unlock(&nvbo->bo.bdev->fence_lock);
 		if (ret) {
 			NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
 			break;
@@ -791,9 +791,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 	}
 
 	if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) {
-		spin_lock(&nvbo->bo.lock);
+		spin_lock(&nvbo->bo.bdev->fence_lock);
 		ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait);
-		spin_unlock(&nvbo->bo.lock);
+		spin_unlock(&nvbo->bo.bdev->fence_lock);
 	} else {
 		ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait);
 		if (ret == 0)

+ 2 - 2
drivers/gpu/drm/radeon/radeon.h

@@ -69,6 +69,7 @@
 #include <ttm/ttm_bo_driver.h>
 #include <ttm/ttm_placement.h>
 #include <ttm/ttm_module.h>
+#include <ttm/ttm_execbuf_util.h>
 
 #include "radeon_family.h"
 #include "radeon_mode.h"
@@ -259,13 +260,12 @@ struct radeon_bo {
 };
 
 struct radeon_bo_list {
-	struct list_head	list;
+	struct ttm_validate_buffer tv;
 	struct radeon_bo	*bo;
 	uint64_t		gpu_offset;
 	unsigned		rdomain;
 	unsigned		wdomain;
 	u32			tiling_flags;
-	bool			reserved;
 };
 
 /*

+ 10 - 7
drivers/gpu/drm/radeon/radeon_cs.c

@@ -77,13 +77,13 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs_ptr[i] = &p->relocs[i];
 			p->relocs[i].robj = p->relocs[i].gobj->driver_private;
 			p->relocs[i].lobj.bo = p->relocs[i].robj;
-			p->relocs[i].lobj.rdomain = r->read_domains;
 			p->relocs[i].lobj.wdomain = r->write_domain;
+			p->relocs[i].lobj.rdomain = r->read_domains;
+			p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
 			p->relocs[i].handle = r->handle;
 			p->relocs[i].flags = r->flags;
-			INIT_LIST_HEAD(&p->relocs[i].lobj.list);
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
-						&p->validated);
+						  &p->validated);
 		}
 	}
 	return radeon_bo_list_validate(&p->validated);
@@ -189,10 +189,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 {
 	unsigned i;
 
-	if (!error && parser->ib) {
-		radeon_bo_list_fence(&parser->validated, parser->ib->fence);
-	}
-	radeon_bo_list_unreserve(&parser->validated);
+
+	if (!error && parser->ib)
+		ttm_eu_fence_buffer_objects(&parser->validated,
+					    parser->ib->fence);
+	else
+		ttm_eu_backoff_reservation(&parser->validated);
+
 	if (parser->relocs != NULL) {
 		for (i = 0; i < parser->nrelocs; i++) {
 			if (parser->relocs[i].gobj)

+ 4 - 51
drivers/gpu/drm/radeon/radeon_object.c

@@ -293,34 +293,9 @@ void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head)
 {
 	if (lobj->wdomain) {
-		list_add(&lobj->list, head);
+		list_add(&lobj->tv.head, head);
 	} else {
-		list_add_tail(&lobj->list, head);
-	}
-}
-
-int radeon_bo_list_reserve(struct list_head *head)
-{
-	struct radeon_bo_list *lobj;
-	int r;
-
-	list_for_each_entry(lobj, head, list){
-		r = radeon_bo_reserve(lobj->bo, false);
-		if (unlikely(r != 0))
-			return r;
-		lobj->reserved = true;
-	}
-	return 0;
-}
-
-void radeon_bo_list_unreserve(struct list_head *head)
-{
-	struct radeon_bo_list *lobj;
-
-	list_for_each_entry(lobj, head, list) {
-		/* only unreserve object we successfully reserved */
-		if (lobj->reserved && radeon_bo_is_reserved(lobj->bo))
-			radeon_bo_unreserve(lobj->bo);
+		list_add_tail(&lobj->tv.head, head);
 	}
 }
 
@@ -331,14 +306,11 @@ int radeon_bo_list_validate(struct list_head *head)
 	u32 domain;
 	int r;
 
-	list_for_each_entry(lobj, head, list) {
-		lobj->reserved = false;
-	}
-	r = radeon_bo_list_reserve(head);
+	r = ttm_eu_reserve_buffers(head);
 	if (unlikely(r != 0)) {
 		return r;
 	}
-	list_for_each_entry(lobj, head, list) {
+	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->bo;
 		if (!bo->pin_count) {
 			domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
@@ -361,25 +333,6 @@ int radeon_bo_list_validate(struct list_head *head)
 	return 0;
 }
 
-void radeon_bo_list_fence(struct list_head *head, void *fence)
-{
-	struct radeon_bo_list *lobj;
-	struct radeon_bo *bo;
-	struct radeon_fence *old_fence = NULL;
-
-	list_for_each_entry(lobj, head, list) {
-		bo = lobj->bo;
-		spin_lock(&bo->tbo.lock);
-		old_fence = (struct radeon_fence *)bo->tbo.sync_obj;
-		bo->tbo.sync_obj = radeon_fence_ref(fence);
-		bo->tbo.sync_obj_arg = NULL;
-		spin_unlock(&bo->tbo.lock);
-		if (old_fence) {
-			radeon_fence_unref(&old_fence);
-		}
-	}
-}
-
 int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 			     struct vm_area_struct *vma)
 {

+ 2 - 5
drivers/gpu/drm/radeon/radeon_object.h

@@ -126,12 +126,12 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
 	if (unlikely(r != 0))
 		return r;
-	spin_lock(&bo->tbo.lock);
+	spin_lock(&bo->tbo.bdev->fence_lock);
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
 	if (bo->tbo.sync_obj)
 		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.lock);
+	spin_unlock(&bo->tbo.bdev->fence_lock);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
 }
@@ -152,10 +152,7 @@ extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
 extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head);
-extern int radeon_bo_list_reserve(struct list_head *head);
-extern void radeon_bo_list_unreserve(struct list_head *head);
 extern int radeon_bo_list_validate(struct list_head *head);
-extern void radeon_bo_list_fence(struct list_head *head, void *fence);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 				struct vm_area_struct *vma);
 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,

+ 99 - 57
drivers/gpu/drm/ttm/ttm_bo.c

@@ -169,7 +169,7 @@ int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible)
 }
 EXPORT_SYMBOL(ttm_bo_wait_unreserved);
 
-static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man;
@@ -191,11 +191,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 	}
 }
 
-/**
- * Call with the lru_lock held.
- */
-
-static int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 {
 	int put_count = 0;
 
@@ -227,9 +223,18 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 		/**
 		 * Deadlock avoidance for multi-bo reserving.
 		 */
-		if (use_sequence && bo->seq_valid &&
-			(sequence - bo->val_seq < (1 << 31))) {
-			return -EAGAIN;
+		if (use_sequence && bo->seq_valid) {
+			/**
+			 * We've already reserved this one.
+			 */
+			if (unlikely(sequence == bo->val_seq))
+				return -EDEADLK;
+			/**
+			 * Already reserved by a thread that will not back
+			 * off for us. We need to back off.
+			 */
+			if (unlikely(sequence - bo->val_seq < (1 << 31)))
+				return -EAGAIN;
 		}
 
 		if (no_wait)
@@ -267,6 +272,13 @@ static void ttm_bo_ref_bug(struct kref *list_kref)
 	BUG();
 }
 
+void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+			 bool never_free)
+{
+	kref_sub(&bo->list_kref, count,
+		 (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list);
+}
+
 int ttm_bo_reserve(struct ttm_buffer_object *bo,
 		   bool interruptible,
 		   bool no_wait, bool use_sequence, uint32_t sequence)
@@ -282,20 +294,24 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 		put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	return ret;
 }
 
+void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo)
+{
+	ttm_bo_add_to_lru(bo);
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+}
+
 void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_global *glob = bo->glob;
 
 	spin_lock(&glob->lru_lock);
-	ttm_bo_add_to_lru(bo);
-	atomic_set(&bo->reserved, 0);
-	wake_up_all(&bo->event_queue);
+	ttm_bo_unreserve_locked(bo);
 	spin_unlock(&glob->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_unreserve);
@@ -362,8 +378,13 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 	int ret = 0;
 
 	if (old_is_pci || new_is_pci ||
-	    ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0))
-		ttm_bo_unmap_virtual(bo);
+	    ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) {
+		ret = ttm_mem_io_lock(old_man, true);
+		if (unlikely(ret != 0))
+			goto out_err;
+		ttm_bo_unmap_virtual_locked(bo);
+		ttm_mem_io_unlock(old_man);
+	}
 
 	/*
 	 * Create and bind a ttm if required.
@@ -416,11 +437,9 @@ moved:
 	}
 
 	if (bo->mem.mm_node) {
-		spin_lock(&bo->lock);
 		bo->offset = (bo->mem.start << PAGE_SHIFT) +
 		    bdev->man[bo->mem.mem_type].gpu_offset;
 		bo->cur_placement = bo->mem.placement;
-		spin_unlock(&bo->lock);
 	} else
 		bo->offset = 0;
 
@@ -452,7 +471,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 		ttm_tt_destroy(bo->ttm);
 		bo->ttm = NULL;
 	}
-
 	ttm_bo_mem_put(bo, &bo->mem);
 
 	atomic_set(&bo->reserved, 0);
@@ -474,14 +492,14 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 	int put_count;
 	int ret;
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	(void) ttm_bo_wait(bo, false, false, true);
 	if (!bo->sync_obj) {
 
 		spin_lock(&glob->lru_lock);
 
 		/**
-		 * Lock inversion between bo::reserve and bo::lock here,
+		 * Lock inversion between bo:reserve and bdev::fence_lock here,
 		 * but that's OK, since we're only trylocking.
 		 */
 
@@ -490,14 +508,13 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 		if (unlikely(ret == -EBUSY))
 			goto queue;
 
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		put_count = ttm_bo_del_from_lru(bo);
 
 		spin_unlock(&glob->lru_lock);
 		ttm_bo_cleanup_memtype_use(bo);
 
-		while (put_count--)
-			kref_put(&bo->list_kref, ttm_bo_ref_bug);
+		ttm_bo_list_ref_sub(bo, put_count, true);
 
 		return;
 	} else {
@@ -512,7 +529,7 @@ queue:
 	kref_get(&bo->list_kref);
 	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
 	spin_unlock(&glob->lru_lock);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 
 	if (sync_obj) {
 		driver->sync_obj_flush(sync_obj, sync_obj_arg);
@@ -537,14 +554,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			       bool no_wait_reserve,
 			       bool no_wait_gpu)
 {
+	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
 	int ret = 0;
 
 retry:
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0))
 		return ret;
@@ -580,8 +598,7 @@ retry:
 	spin_unlock(&glob->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	return 0;
 }
@@ -652,6 +669,7 @@ static void ttm_bo_release(struct kref *kref)
 	struct ttm_buffer_object *bo =
 	    container_of(kref, struct ttm_buffer_object, kref);
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
 
 	if (likely(bo->vm_node != NULL)) {
 		rb_erase(&bo->vm_rb, &bdev->addr_space_rb);
@@ -659,6 +677,9 @@ static void ttm_bo_release(struct kref *kref)
 		bo->vm_node = NULL;
 	}
 	write_unlock(&bdev->vm_lock);
+	ttm_mem_io_lock(man, false);
+	ttm_mem_io_free_vm(bo);
+	ttm_mem_io_unlock(man);
 	ttm_bo_cleanup_refs_or_queue(bo);
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 	write_lock(&bdev->vm_lock);
@@ -698,9 +719,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS) {
@@ -715,7 +736,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 
 	evict_mem = bo->mem;
 	evict_mem.mm_node = NULL;
-	evict_mem.bus.io_reserved = false;
+	evict_mem.bus.io_reserved_vm = false;
+	evict_mem.bus.io_reserved_count = 0;
 
 	placement.fpfn = 0;
 	placement.lpfn = 0;
@@ -802,8 +824,7 @@ retry:
 
 	BUG_ON(ret != 0);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu);
 	ttm_bo_unreserve(bo);
@@ -1036,6 +1057,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 {
 	int ret = 0;
 	struct ttm_mem_reg mem;
+	struct ttm_bo_device *bdev = bo->bdev;
 
 	BUG_ON(!atomic_read(&bo->reserved));
 
@@ -1044,15 +1066,16 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	 * Have the driver move function wait for idle when necessary,
 	 * instead of doing it here.
 	 */
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 	if (ret)
 		return ret;
 	mem.num_pages = bo->num_pages;
 	mem.size = mem.num_pages << PAGE_SHIFT;
 	mem.page_alignment = bo->mem.page_alignment;
-	mem.bus.io_reserved = false;
+	mem.bus.io_reserved_vm = false;
+	mem.bus.io_reserved_count = 0;
 	/*
 	 * Determine where to move the buffer.
 	 */
@@ -1163,7 +1186,6 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	}
 	bo->destroy = destroy;
 
-	spin_lock_init(&bo->lock);
 	kref_init(&bo->kref);
 	kref_init(&bo->list_kref);
 	atomic_set(&bo->cpu_writers, 0);
@@ -1172,6 +1194,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bo->lru);
 	INIT_LIST_HEAD(&bo->ddestroy);
 	INIT_LIST_HEAD(&bo->swap);
+	INIT_LIST_HEAD(&bo->io_reserve_lru);
 	bo->bdev = bdev;
 	bo->glob = bdev->glob;
 	bo->type = type;
@@ -1181,7 +1204,8 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	bo->mem.num_pages = bo->num_pages;
 	bo->mem.mm_node = NULL;
 	bo->mem.page_alignment = page_alignment;
-	bo->mem.bus.io_reserved = false;
+	bo->mem.bus.io_reserved_vm = false;
+	bo->mem.bus.io_reserved_count = 0;
 	bo->buffer_start = buffer_start & PAGE_MASK;
 	bo->priv_flags = 0;
 	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
@@ -1355,6 +1379,10 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	BUG_ON(type >= TTM_NUM_MEM_TYPES);
 	man = &bdev->man[type];
 	BUG_ON(man->has_type);
+	man->io_reserve_fastpath = true;
+	man->use_io_reserve_lru = false;
+	mutex_init(&man->io_reserve_mutex);
+	INIT_LIST_HEAD(&man->io_reserve_lru);
 
 	ret = bdev->driver->init_mem_type(bdev, type, man);
 	if (ret)
@@ -1527,7 +1555,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	bdev->dev_mapping = NULL;
 	bdev->glob = glob;
 	bdev->need_dma32 = need_dma32;
-
+	bdev->val_seq = 0;
+	spin_lock_init(&bdev->fence_lock);
 	mutex_lock(&glob->device_list_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
 	mutex_unlock(&glob->device_list_mutex);
@@ -1561,7 +1590,7 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 	return true;
 }
 
-void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
+void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	loff_t offset = (loff_t) bo->addr_space_offset;
@@ -1570,8 +1599,20 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 	if (!bdev->dev_mapping)
 		return;
 	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
-	ttm_mem_io_free(bdev, &bo->mem);
+	ttm_mem_io_free_vm(bo);
+}
+
+void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+
+	ttm_mem_io_lock(man, false);
+	ttm_bo_unmap_virtual_locked(bo);
+	ttm_mem_io_unlock(man);
 }
+
+
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
 static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
@@ -1651,6 +1692,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool lazy, bool interruptible, bool no_wait)
 {
 	struct ttm_bo_driver *driver = bo->bdev->driver;
+	struct ttm_bo_device *bdev = bo->bdev;
 	void *sync_obj;
 	void *sync_obj_arg;
 	int ret = 0;
@@ -1664,9 +1706,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 			void *tmp_obj = bo->sync_obj;
 			bo->sync_obj = NULL;
 			clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-			spin_unlock(&bo->lock);
+			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 			continue;
 		}
 
@@ -1675,29 +1717,29 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 
 		sync_obj = driver->sync_obj_ref(bo->sync_obj);
 		sync_obj_arg = bo->sync_obj_arg;
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
 					    lazy, interruptible);
 		if (unlikely(ret != 0)) {
 			driver->sync_obj_unref(&sync_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 			return ret;
 		}
-		spin_lock(&bo->lock);
+		spin_lock(&bdev->fence_lock);
 		if (likely(bo->sync_obj == sync_obj &&
 			   bo->sync_obj_arg == sync_obj_arg)) {
 			void *tmp_obj = bo->sync_obj;
 			bo->sync_obj = NULL;
 			clear_bit(TTM_BO_PRIV_FLAG_MOVING,
 				  &bo->priv_flags);
-			spin_unlock(&bo->lock);
+			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&sync_obj);
 			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 		} else {
-			spin_unlock(&bo->lock);
+			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&sync_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 		}
 	}
 	return 0;
@@ -1706,6 +1748,7 @@ EXPORT_SYMBOL(ttm_bo_wait);
 
 int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 {
+	struct ttm_bo_device *bdev = bo->bdev;
 	int ret = 0;
 
 	/*
@@ -1715,9 +1758,9 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
 	if (unlikely(ret != 0))
 		return ret;
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, true, no_wait);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 	if (likely(ret == 0))
 		atomic_inc(&bo->cpu_writers);
 	ttm_bo_unreserve(bo);
@@ -1783,16 +1826,15 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	/**
 	 * Wait for GPU, then move to system cached.
 	 */
 
-	spin_lock(&bo->lock);
+	spin_lock(&bo->bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, false, false);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bo->bdev->fence_lock);
 
 	if (unlikely(ret != 0))
 		goto out;

+ 118 - 18
drivers/gpu/drm/ttm/ttm_bo_util.c

@@ -75,37 +75,123 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_ttm);
 
-int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+int ttm_mem_io_lock(struct ttm_mem_type_manager *man, bool interruptible)
 {
-	int ret;
+	if (likely(man->io_reserve_fastpath))
+		return 0;
+
+	if (interruptible)
+		return mutex_lock_interruptible(&man->io_reserve_mutex);
+
+	mutex_lock(&man->io_reserve_mutex);
+	return 0;
+}
 
-	if (!mem->bus.io_reserved) {
-		mem->bus.io_reserved = true;
+void ttm_mem_io_unlock(struct ttm_mem_type_manager *man)
+{
+	if (likely(man->io_reserve_fastpath))
+		return;
+
+	mutex_unlock(&man->io_reserve_mutex);
+}
+
+static int ttm_mem_io_evict(struct ttm_mem_type_manager *man)
+{
+	struct ttm_buffer_object *bo;
+
+	if (!man->use_io_reserve_lru || list_empty(&man->io_reserve_lru))
+		return -EAGAIN;
+
+	bo = list_first_entry(&man->io_reserve_lru,
+			      struct ttm_buffer_object,
+			      io_reserve_lru);
+	list_del_init(&bo->io_reserve_lru);
+	ttm_bo_unmap_virtual_locked(bo);
+
+	return 0;
+}
+
+static int ttm_mem_io_reserve(struct ttm_bo_device *bdev,
+			      struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	int ret = 0;
+
+	if (!bdev->driver->io_mem_reserve)
+		return 0;
+	if (likely(man->io_reserve_fastpath))
+		return bdev->driver->io_mem_reserve(bdev, mem);
+
+	if (bdev->driver->io_mem_reserve &&
+	    mem->bus.io_reserved_count++ == 0) {
+retry:
 		ret = bdev->driver->io_mem_reserve(bdev, mem);
+		if (ret == -EAGAIN) {
+			ret = ttm_mem_io_evict(man);
+			if (ret == 0)
+				goto retry;
+		}
+	}
+	return ret;
+}
+
+static void ttm_mem_io_free(struct ttm_bo_device *bdev,
+			    struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	if (likely(man->io_reserve_fastpath))
+		return;
+
+	if (bdev->driver->io_mem_reserve &&
+	    --mem->bus.io_reserved_count == 0 &&
+	    bdev->driver->io_mem_free)
+		bdev->driver->io_mem_free(bdev, mem);
+
+}
+
+int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo)
+{
+	struct ttm_mem_reg *mem = &bo->mem;
+	int ret;
+
+	if (!mem->bus.io_reserved_vm) {
+		struct ttm_mem_type_manager *man =
+			&bo->bdev->man[mem->mem_type];
+
+		ret = ttm_mem_io_reserve(bo->bdev, mem);
 		if (unlikely(ret != 0))
 			return ret;
+		mem->bus.io_reserved_vm = true;
+		if (man->use_io_reserve_lru)
+			list_add_tail(&bo->io_reserve_lru,
+				      &man->io_reserve_lru);
 	}
 	return 0;
 }
 
-void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+void ttm_mem_io_free_vm(struct ttm_buffer_object *bo)
 {
-	if (bdev->driver->io_mem_reserve) {
-		if (mem->bus.io_reserved) {
-			mem->bus.io_reserved = false;
-			bdev->driver->io_mem_free(bdev, mem);
-		}
+	struct ttm_mem_reg *mem = &bo->mem;
+
+	if (mem->bus.io_reserved_vm) {
+		mem->bus.io_reserved_vm = false;
+		list_del_init(&bo->io_reserve_lru);
+		ttm_mem_io_free(bo->bdev, mem);
 	}
 }
 
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
 			void **virtual)
 {
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	int ret;
 	void *addr;
 
 	*virtual = NULL;
+	(void) ttm_mem_io_lock(man, false);
 	ret = ttm_mem_io_reserve(bdev, mem);
+	ttm_mem_io_unlock(man);
 	if (ret || !mem->bus.is_iomem)
 		return ret;
 
@@ -117,7 +203,9 @@ int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
 		else
 			addr = ioremap_nocache(mem->bus.base + mem->bus.offset, mem->bus.size);
 		if (!addr) {
+			(void) ttm_mem_io_lock(man, false);
 			ttm_mem_io_free(bdev, mem);
+			ttm_mem_io_unlock(man);
 			return -ENOMEM;
 		}
 	}
@@ -134,7 +222,9 @@ void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
 
 	if (virtual && mem->bus.addr == NULL)
 		iounmap(virtual);
+	(void) ttm_mem_io_lock(man, false);
 	ttm_mem_io_free(bdev, mem);
+	ttm_mem_io_unlock(man);
 }
 
 static int ttm_copy_io_page(void *dst, void *src, unsigned long page)
@@ -231,7 +321,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
 	struct ttm_tt *ttm = bo->ttm;
 	struct ttm_mem_reg *old_mem = &bo->mem;
-	struct ttm_mem_reg old_copy = *old_mem;
+	struct ttm_mem_reg old_copy;
 	void *old_iomap;
 	void *new_iomap;
 	int ret;
@@ -281,7 +371,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	mb();
 out2:
 	ttm_bo_free_old_node(bo);
-
+	old_copy = *old_mem;
 	*old_mem = *new_mem;
 	new_mem->mm_node = NULL;
 
@@ -292,7 +382,7 @@ out2:
 	}
 
 out1:
-	ttm_mem_reg_iounmap(bdev, new_mem, new_iomap);
+	ttm_mem_reg_iounmap(bdev, old_mem, new_iomap);
 out:
 	ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap);
 	return ret;
@@ -337,11 +427,11 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	 * TODO: Explicit member copy would probably be better here.
 	 */
 
-	spin_lock_init(&fbo->lock);
 	init_waitqueue_head(&fbo->event_queue);
 	INIT_LIST_HEAD(&fbo->ddestroy);
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
+	INIT_LIST_HEAD(&fbo->io_reserve_lru);
 	fbo->vm_node = NULL;
 	atomic_set(&fbo->cpu_writers, 0);
 
@@ -453,6 +543,8 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo,
 		unsigned long start_page, unsigned long num_pages,
 		struct ttm_bo_kmap_obj *map)
 {
+	struct ttm_mem_type_manager *man =
+		&bo->bdev->man[bo->mem.mem_type];
 	unsigned long offset, size;
 	int ret;
 
@@ -467,7 +559,9 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo,
 	if (num_pages > 1 && !DRM_SUSER(DRM_CURPROC))
 		return -EPERM;
 #endif
+	(void) ttm_mem_io_lock(man, false);
 	ret = ttm_mem_io_reserve(bo->bdev, &bo->mem);
+	ttm_mem_io_unlock(man);
 	if (ret)
 		return ret;
 	if (!bo->mem.bus.is_iomem) {
@@ -482,12 +576,15 @@ EXPORT_SYMBOL(ttm_bo_kmap);
 
 void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 {
+	struct ttm_buffer_object *bo = map->bo;
+	struct ttm_mem_type_manager *man =
+		&bo->bdev->man[bo->mem.mem_type];
+
 	if (!map->virtual)
 		return;
 	switch (map->bo_kmap_type) {
 	case ttm_bo_map_iomap:
 		iounmap(map->virtual);
-		ttm_mem_io_free(map->bo->bdev, &map->bo->mem);
 		break;
 	case ttm_bo_map_vmap:
 		vunmap(map->virtual);
@@ -500,6 +597,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 	default:
 		BUG();
 	}
+	(void) ttm_mem_io_lock(man, false);
+	ttm_mem_io_free(map->bo->bdev, &map->bo->mem);
+	ttm_mem_io_unlock(man);
 	map->virtual = NULL;
 	map->page = NULL;
 }
@@ -520,7 +620,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	struct ttm_buffer_object *ghost_obj;
 	void *tmp_obj = NULL;
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	if (bo->sync_obj) {
 		tmp_obj = bo->sync_obj;
 		bo->sync_obj = NULL;
@@ -529,7 +629,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	bo->sync_obj_arg = sync_obj_arg;
 	if (evict) {
 		ret = ttm_bo_wait(bo, false, false, false);
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		if (tmp_obj)
 			driver->sync_obj_unref(&tmp_obj);
 		if (ret)
@@ -552,7 +652,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 */
 
 		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		if (tmp_obj)
 			driver->sync_obj_unref(&tmp_obj);
 

+ 18 - 11
drivers/gpu/drm/ttm/ttm_bo_vm.c

@@ -83,6 +83,8 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	int i;
 	unsigned long address = (unsigned long)vmf->virtual_address;
 	int retval = VM_FAULT_NOPAGE;
+	struct ttm_mem_type_manager *man =
+		&bdev->man[bo->mem.mem_type];
 
 	/*
 	 * Work around locking order reversal in fault / nopfn
@@ -118,24 +120,28 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * move.
 	 */
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
 		ret = ttm_bo_wait(bo, false, true, false);
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		if (unlikely(ret != 0)) {
 			retval = (ret != -ERESTARTSYS) ?
 			    VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
 			goto out_unlock;
 		}
 	} else
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 
-
-	ret = ttm_mem_io_reserve(bdev, &bo->mem);
-	if (ret) {
-		retval = VM_FAULT_SIGBUS;
+	ret = ttm_mem_io_lock(man, true);
+	if (unlikely(ret != 0)) {
+		retval = VM_FAULT_NOPAGE;
 		goto out_unlock;
 	}
+	ret = ttm_mem_io_reserve_vm(bo);
+	if (unlikely(ret != 0)) {
+		retval = VM_FAULT_SIGBUS;
+		goto out_io_unlock;
+	}
 
 	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
 	    bo->vm_node->start - vma->vm_pgoff;
@@ -144,7 +150,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	if (unlikely(page_offset >= bo->num_pages)) {
 		retval = VM_FAULT_SIGBUS;
-		goto out_unlock;
+		goto out_io_unlock;
 	}
 
 	/*
@@ -182,7 +188,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			page = ttm_tt_get_page(ttm, page_offset);
 			if (unlikely(!page && i == 0)) {
 				retval = VM_FAULT_OOM;
-				goto out_unlock;
+				goto out_io_unlock;
 			} else if (unlikely(!page)) {
 				break;
 			}
@@ -200,14 +206,15 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		else if (unlikely(ret != 0)) {
 			retval =
 			    (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
-			goto out_unlock;
+			goto out_io_unlock;
 		}
 
 		address += PAGE_SIZE;
 		if (unlikely(++page_offset >= page_last))
 			break;
 	}
-
+out_io_unlock:
+	ttm_mem_io_unlock(man);
 out_unlock:
 	ttm_bo_unreserve(bo);
 	return retval;

+ 145 - 24
drivers/gpu/drm/ttm/ttm_execbuf_util.c

@@ -32,7 +32,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 
-void ttm_eu_backoff_reservation(struct list_head *list)
+static void ttm_eu_backoff_reservation_locked(struct list_head *list)
 {
 	struct ttm_validate_buffer *entry;
 
@@ -41,10 +41,77 @@ void ttm_eu_backoff_reservation(struct list_head *list)
 		if (!entry->reserved)
 			continue;
 
+		if (entry->removed) {
+			ttm_bo_add_to_lru(bo);
+			entry->removed = false;
+
+		}
 		entry->reserved = false;
-		ttm_bo_unreserve(bo);
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+	}
+}
+
+static void ttm_eu_del_from_lru_locked(struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct ttm_buffer_object *bo = entry->bo;
+		if (!entry->reserved)
+			continue;
+
+		if (!entry->removed) {
+			entry->put_count = ttm_bo_del_from_lru(bo);
+			entry->removed = true;
+		}
 	}
 }
+
+static void ttm_eu_list_ref_sub(struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct ttm_buffer_object *bo = entry->bo;
+
+		if (entry->put_count) {
+			ttm_bo_list_ref_sub(bo, entry->put_count, true);
+			entry->put_count = 0;
+		}
+	}
+}
+
+static int ttm_eu_wait_unreserved_locked(struct list_head *list,
+					 struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_global *glob = bo->glob;
+	int ret;
+
+	ttm_eu_del_from_lru_locked(list);
+	spin_unlock(&glob->lru_lock);
+	ret = ttm_bo_wait_unreserved(bo, true);
+	spin_lock(&glob->lru_lock);
+	if (unlikely(ret != 0))
+		ttm_eu_backoff_reservation_locked(list);
+	return ret;
+}
+
+
+void ttm_eu_backoff_reservation(struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+	struct ttm_bo_global *glob;
+
+	if (list_empty(list))
+		return;
+
+	entry = list_first_entry(list, struct ttm_validate_buffer, head);
+	glob = entry->bo->glob;
+	spin_lock(&glob->lru_lock);
+	ttm_eu_backoff_reservation_locked(list);
+	spin_unlock(&glob->lru_lock);
+}
 EXPORT_SYMBOL(ttm_eu_backoff_reservation);
 
 /*
@@ -59,37 +126,76 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
  * buffers in different orders.
  */
 
-int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq)
+int ttm_eu_reserve_buffers(struct list_head *list)
 {
+	struct ttm_bo_global *glob;
 	struct ttm_validate_buffer *entry;
 	int ret;
+	uint32_t val_seq;
+
+	if (list_empty(list))
+		return 0;
+
+	list_for_each_entry(entry, list, head) {
+		entry->reserved = false;
+		entry->put_count = 0;
+		entry->removed = false;
+	}
+
+	entry = list_first_entry(list, struct ttm_validate_buffer, head);
+	glob = entry->bo->glob;
 
 retry:
+	spin_lock(&glob->lru_lock);
+	val_seq = entry->bo->bdev->val_seq++;
+
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		entry->reserved = false;
-		ret = ttm_bo_reserve(bo, true, false, true, val_seq);
-		if (ret != 0) {
-			ttm_eu_backoff_reservation(list);
-			if (ret == -EAGAIN) {
-				ret = ttm_bo_wait_unreserved(bo, true);
-				if (unlikely(ret != 0))
-					return ret;
-				goto retry;
-			} else
+retry_this_bo:
+		ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq);
+		switch (ret) {
+		case 0:
+			break;
+		case -EBUSY:
+			ret = ttm_eu_wait_unreserved_locked(list, bo);
+			if (unlikely(ret != 0)) {
+				spin_unlock(&glob->lru_lock);
+				ttm_eu_list_ref_sub(list);
 				return ret;
+			}
+			goto retry_this_bo;
+		case -EAGAIN:
+			ttm_eu_backoff_reservation_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ttm_eu_list_ref_sub(list);
+			ret = ttm_bo_wait_unreserved(bo, true);
+			if (unlikely(ret != 0))
+				return ret;
+			goto retry;
+		default:
+			ttm_eu_backoff_reservation_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ttm_eu_list_ref_sub(list);
+			return ret;
 		}
 
 		entry->reserved = true;
 		if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
-			ttm_eu_backoff_reservation(list);
+			ttm_eu_backoff_reservation_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ttm_eu_list_ref_sub(list);
 			ret = ttm_bo_wait_cpu(bo, false);
 			if (ret)
 				return ret;
 			goto retry;
 		}
 	}
+
+	ttm_eu_del_from_lru_locked(list);
+	spin_unlock(&glob->lru_lock);
+	ttm_eu_list_ref_sub(list);
+
 	return 0;
 }
 EXPORT_SYMBOL(ttm_eu_reserve_buffers);
@@ -97,21 +203,36 @@ EXPORT_SYMBOL(ttm_eu_reserve_buffers);
 void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
 {
 	struct ttm_validate_buffer *entry;
+	struct ttm_buffer_object *bo;
+	struct ttm_bo_global *glob;
+	struct ttm_bo_device *bdev;
+	struct ttm_bo_driver *driver;
 
-	list_for_each_entry(entry, list, head) {
-		struct ttm_buffer_object *bo = entry->bo;
-		struct ttm_bo_driver *driver = bo->bdev->driver;
-		void *old_sync_obj;
+	if (list_empty(list))
+		return;
+
+	bo = list_first_entry(list, struct ttm_validate_buffer, head)->bo;
+	bdev = bo->bdev;
+	driver = bdev->driver;
+	glob = bo->glob;
 
-		spin_lock(&bo->lock);
-		old_sync_obj = bo->sync_obj;
+	spin_lock(&bdev->fence_lock);
+	spin_lock(&glob->lru_lock);
+
+	list_for_each_entry(entry, list, head) {
+		bo = entry->bo;
+		entry->old_sync_obj = bo->sync_obj;
 		bo->sync_obj = driver->sync_obj_ref(sync_obj);
 		bo->sync_obj_arg = entry->new_sync_obj_arg;
-		spin_unlock(&bo->lock);
-		ttm_bo_unreserve(bo);
+		ttm_bo_unreserve_locked(bo);
 		entry->reserved = false;
-		if (old_sync_obj)
-			driver->sync_obj_unref(&old_sync_obj);
+	}
+	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->fence_lock);
+
+	list_for_each_entry(entry, list, head) {
+		if (entry->old_sync_obj)
+			driver->sync_obj_unref(&entry->old_sync_obj);
 	}
 }
 EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);

+ 0 - 1
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h

@@ -264,7 +264,6 @@ struct vmw_private {
 	 */
 
 	struct vmw_sw_context ctx;
-	uint32_t val_seq;
 	struct mutex cmdbuf_mutex;
 
 	/**

+ 1 - 2
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c

@@ -653,8 +653,7 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 	ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
-	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes,
-				     dev_priv->val_seq++);
+	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes);
 	if (unlikely(ret != 0))
 		goto out_err;
 

+ 45 - 5
include/drm/ttm/ttm_bo_api.h

@@ -74,6 +74,8 @@ struct ttm_placement {
  * @is_iomem:		is this io memory ?
  * @size:		size in byte
  * @offset:		offset from the base address
+ * @io_reserved_vm:     The VM system has a refcount in @io_reserved_count
+ * @io_reserved_count:  Refcounting the numbers of callers to ttm_mem_io_reserve
  *
  * Structure indicating the bus placement of an object.
  */
@@ -83,7 +85,8 @@ struct ttm_bus_placement {
 	unsigned long	size;
 	unsigned long	offset;
 	bool		is_iomem;
-	bool		io_reserved;
+	bool		io_reserved_vm;
+	uint64_t        io_reserved_count;
 };
 
 
@@ -154,7 +157,6 @@ struct ttm_tt;
  * keeps one refcount. When this refcount reaches zero,
  * the object is destroyed.
  * @event_queue: Queue for processes waiting on buffer object status change.
- * @lock: spinlock protecting mostly synchronization members.
  * @mem: structure describing current placement.
  * @persistant_swap_storage: Usually the swap storage is deleted for buffers
  * pinned in physical memory. If this behaviour is not desired, this member
@@ -213,7 +215,6 @@ struct ttm_buffer_object {
 	struct kref kref;
 	struct kref list_kref;
 	wait_queue_head_t event_queue;
-	spinlock_t lock;
 
 	/**
 	 * Members protected by the bo::reserved lock.
@@ -237,6 +238,7 @@ struct ttm_buffer_object {
 	struct list_head lru;
 	struct list_head ddestroy;
 	struct list_head swap;
+	struct list_head io_reserve_lru;
 	uint32_t val_seq;
 	bool seq_valid;
 
@@ -248,10 +250,10 @@ struct ttm_buffer_object {
 	atomic_t reserved;
 
 	/**
-	 * Members protected by the bo::lock
+	 * Members protected by struct buffer_object_device::fence_lock
 	 * In addition, setting sync_obj to anything else
 	 * than NULL requires bo::reserved to be held. This allows for
-	 * checking NULL while reserved but not holding bo::lock.
+	 * checking NULL while reserved but not holding the mentioned lock.
 	 */
 
 	void *sync_obj_arg;
@@ -364,6 +366,44 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo,
  */
 extern void ttm_bo_unref(struct ttm_buffer_object **bo);
 
+
+/**
+ * ttm_bo_list_ref_sub
+ *
+ * @bo: The buffer object.
+ * @count: The number of references with which to decrease @bo::list_kref;
+ * @never_free: The refcount should not reach zero with this operation.
+ *
+ * Release @count lru list references to this buffer object.
+ */
+extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+				bool never_free);
+
+/**
+ * ttm_bo_add_to_lru
+ *
+ * @bo: The buffer object.
+ *
+ * Add this bo to the relevant mem type lru and, if it's backed by
+ * system pages (ttms) to the swap list.
+ * This function must be called with struct ttm_bo_global::lru_lock held, and
+ * is typically called immediately prior to unreserving a bo.
+ */
+extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
+
+/**
+ * ttm_bo_del_from_lru
+ *
+ * @bo: The buffer object.
+ *
+ * Remove this bo from all lru lists used to lookup and reserve an object.
+ * This function must be called with struct ttm_bo_global::lru_lock held,
+ * and is usually called just immediately after the bo has been reserved to
+ * avoid recursive reservation from lru lists.
+ */
+extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
+
+
 /**
  * ttm_bo_lock_delayed_workqueue
  *

+ 103 - 49
include/drm/ttm/ttm_bo_driver.h

@@ -179,30 +179,6 @@ struct ttm_tt {
 #define TTM_MEMTYPE_FLAG_MAPPABLE      (1 << 1)	/* Memory mappable */
 #define TTM_MEMTYPE_FLAG_CMA           (1 << 3)	/* Can't map aperture */
 
-/**
- * struct ttm_mem_type_manager
- *
- * @has_type: The memory type has been initialized.
- * @use_type: The memory type is enabled.
- * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory
- * managed by this memory type.
- * @gpu_offset: If used, the GPU offset of the first managed page of
- * fixed memory or the first managed location in an aperture.
- * @size: Size of the managed region.
- * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX,
- * as defined in ttm_placement_common.h
- * @default_caching: The default caching policy used for a buffer object
- * placed in this memory type if the user doesn't provide one.
- * @manager: The range manager used for this memory type. FIXME: If the aperture
- * has a page size different from the underlying system, the granularity
- * of this manager should take care of this. But the range allocating code
- * in ttm_bo.c needs to be modified for this.
- * @lru: The lru list for this memory type.
- *
- * This structure is used to identify and manage memory types for a device.
- * It's set up by the ttm_bo_driver::init_mem_type method.
- */
-
 struct ttm_mem_type_manager;
 
 struct ttm_mem_type_manager_func {
@@ -287,6 +263,36 @@ struct ttm_mem_type_manager_func {
 	void (*debug)(struct ttm_mem_type_manager *man, const char *prefix);
 };
 
+/**
+ * struct ttm_mem_type_manager
+ *
+ * @has_type: The memory type has been initialized.
+ * @use_type: The memory type is enabled.
+ * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory
+ * managed by this memory type.
+ * @gpu_offset: If used, the GPU offset of the first managed page of
+ * fixed memory or the first managed location in an aperture.
+ * @size: Size of the managed region.
+ * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX,
+ * as defined in ttm_placement_common.h
+ * @default_caching: The default caching policy used for a buffer object
+ * placed in this memory type if the user doesn't provide one.
+ * @func: structure pointer implementing the range manager. See above
+ * @priv: Driver private closure for @func.
+ * @io_reserve_mutex: Mutex optionally protecting shared io_reserve structures
+ * @use_io_reserve_lru: Use an lru list to try to unreserve io_mem_regions
+ * reserved by the TTM vm system.
+ * @io_reserve_lru: Optional lru list for unreserving io mem regions.
+ * @io_reserve_fastpath: Only use bdev::driver::io_mem_reserve to obtain
+ * static information. bdev::driver::io_mem_free is never used.
+ * @lru: The lru list for this memory type.
+ *
+ * This structure is used to identify and manage memory types for a device.
+ * It's set up by the ttm_bo_driver::init_mem_type method.
+ */
+
+
+
 struct ttm_mem_type_manager {
 	struct ttm_bo_device *bdev;
 
@@ -303,6 +309,15 @@ struct ttm_mem_type_manager {
 	uint32_t default_caching;
 	const struct ttm_mem_type_manager_func *func;
 	void *priv;
+	struct mutex io_reserve_mutex;
+	bool use_io_reserve_lru;
+	bool io_reserve_fastpath;
+
+	/*
+	 * Protected by @io_reserve_mutex:
+	 */
+
+	struct list_head io_reserve_lru;
 
 	/*
 	 * Protected by the global->lru_lock.
@@ -510,9 +525,12 @@ struct ttm_bo_global {
  *
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
+ * @fence_lock: Protects the synchronizing members on *all* bos belonging
+ * to this device.
  * @addr_space_mm: Range manager for the device address space.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
+ * @val_seq: Current validation sequence.
  * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager.
  * If a GPU lockup has been detected, this is forced to 0.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -531,6 +549,7 @@ struct ttm_bo_device {
 	struct ttm_bo_driver *driver;
 	rwlock_t vm_lock;
 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+	spinlock_t fence_lock;
 	/*
 	 * Protected by the vm lock.
 	 */
@@ -541,6 +560,7 @@ struct ttm_bo_device {
 	 * Protected by the global:lru lock.
 	 */
 	struct list_head ddestroy;
+	uint32_t val_seq;
 
 	/*
 	 * Protected by load / firstopen / lastclose /unload sync.
@@ -753,31 +773,6 @@ extern void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,
 
 extern int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait);
 
-/**
- * ttm_bo_pci_offset - Get the PCI offset for the buffer object memory.
- *
- * @bo Pointer to a struct ttm_buffer_object.
- * @bus_base On return the base of the PCI region
- * @bus_offset On return the byte offset into the PCI region
- * @bus_size On return the byte size of the buffer object or zero if
- * the buffer object memory is not accessible through a PCI region.
- *
- * Returns:
- * -EINVAL if the buffer object is currently not mappable.
- * 0 otherwise.
- */
-
-extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
-			     struct ttm_mem_reg *mem,
-			     unsigned long *bus_base,
-			     unsigned long *bus_offset,
-			     unsigned long *bus_size);
-
-extern int ttm_mem_io_reserve(struct ttm_bo_device *bdev,
-				struct ttm_mem_reg *mem);
-extern void ttm_mem_io_free(struct ttm_bo_device *bdev,
-				struct ttm_mem_reg *mem);
-
 extern void ttm_bo_global_release(struct drm_global_reference *ref);
 extern int ttm_bo_global_init(struct drm_global_reference *ref);
 
@@ -809,6 +804,22 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
  */
 extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 
+/**
+ * ttm_bo_unmap_virtual
+ *
+ * @bo: tear down the virtual mappings for this BO
+ *
+ * The caller must take ttm_mem_io_lock before calling this function.
+ */
+extern void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo);
+
+extern int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo);
+extern void ttm_mem_io_free_vm(struct ttm_buffer_object *bo);
+extern int ttm_mem_io_lock(struct ttm_mem_type_manager *man,
+			   bool interruptible);
+extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man);
+
+
 /**
  * ttm_bo_reserve:
  *
@@ -859,11 +870,44 @@ extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
  * try again. (only if use_sequence == 1).
  * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
  * a signal. Release all buffer reservations and return to user-space.
+ * -EBUSY: The function needed to sleep, but @no_wait was true
+ * -EDEADLK: Bo already reserved using @sequence. This error code will only
+ * be returned if @use_sequence is set to true.
  */
 extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
 			  bool interruptible,
 			  bool no_wait, bool use_sequence, uint32_t sequence);
 
+
+/**
+ * ttm_bo_reserve_locked:
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @interruptible: Sleep interruptible if waiting.
+ * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY.
+ * @use_sequence: If @bo is already reserved, Only sleep waiting for
+ * it to become unreserved if @sequence < (@bo)->sequence.
+ *
+ * Must be called with struct ttm_bo_global::lru_lock held,
+ * and will not remove reserved buffers from the lru lists.
+ * The function may release the LRU spinlock if it needs to sleep.
+ * Otherwise identical to ttm_bo_reserve.
+ *
+ * Returns:
+ * -EAGAIN: The reservation may cause a deadlock.
+ * Release all buffer reservations, wait for @bo to become unreserved and
+ * try again. (only if use_sequence == 1).
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ * -EBUSY: The function needed to sleep, but @no_wait was true
+ * -EDEADLK: Bo already reserved using @sequence. This error code will only
+ * be returned if @use_sequence is set to true.
+ */
+extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
+				 bool interruptible,
+				 bool no_wait, bool use_sequence,
+				 uint32_t sequence);
+
 /**
  * ttm_bo_unreserve
  *
@@ -873,6 +917,16 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
  */
 extern void ttm_bo_unreserve(struct ttm_buffer_object *bo);
 
+/**
+ * ttm_bo_unreserve_locked
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Unreserve a previous reservation of @bo.
+ * Needs to be called with struct ttm_bo_global::lru_lock held.
+ */
+extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo);
+
 /**
  * ttm_bo_wait_unreserved
  *

+ 8 - 3
include/drm/ttm/ttm_execbuf_util.h

@@ -41,7 +41,10 @@
  * @bo:             refcounted buffer object pointer.
  * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once
  * adding a new sync object.
- * @reservied:      Indicates whether @bo has been reserved for validation.
+ * @reserved:       Indicates whether @bo has been reserved for validation.
+ * @removed:        Indicates whether @bo has been removed from lru lists.
+ * @put_count:      Number of outstanding references on bo::list_kref.
+ * @old_sync_obj:   Pointer to a sync object about to be unreferenced
  */
 
 struct ttm_validate_buffer {
@@ -49,6 +52,9 @@ struct ttm_validate_buffer {
 	struct ttm_buffer_object *bo;
 	void *new_sync_obj_arg;
 	bool reserved;
+	bool removed;
+	int put_count;
+	void *old_sync_obj;
 };
 
 /**
@@ -66,7 +72,6 @@ extern void ttm_eu_backoff_reservation(struct list_head *list);
  * function ttm_eu_reserve_buffers
  *
  * @list:    thread private list of ttm_validate_buffer structs.
- * @val_seq: A unique sequence number.
  *
  * Tries to reserve bos pointed to by the list entries for validation.
  * If the function returns 0, all buffers are marked as "unfenced",
@@ -88,7 +93,7 @@ extern void ttm_eu_backoff_reservation(struct list_head *list);
  * has failed.
  */
 
-extern int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq);
+extern int ttm_eu_reserve_buffers(struct list_head *list);
 
 /**
  * function ttm_eu_fence_buffer_objects.

+ 2 - 0
include/linux/kref.h

@@ -24,5 +24,7 @@ struct kref {
 void kref_init(struct kref *kref);
 void kref_get(struct kref *kref);
 int kref_put(struct kref *kref, void (*release) (struct kref *kref));
+int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release) (struct kref *kref));
 
 #endif /* _KREF_H_ */

+ 30 - 0
lib/kref.c

@@ -62,6 +62,36 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 	return 0;
 }
 
+
+/**
+ * kref_sub - subtract a number of refcounts for object.
+ * @kref: object.
+ * @count: Number of recounts to subtract.
+ * @release: pointer to the function that will clean up the object when the
+ *	     last reference to the object is released.
+ *	     This pointer is required, and it is not acceptable to pass kfree
+ *	     in as this function.
+ *
+ * Subtract @count from the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0.  Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory.  Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+int kref_sub(struct kref *kref, unsigned int count,
+	     void (*release)(struct kref *kref))
+{
+	WARN_ON(release == NULL);
+	WARN_ON(release == (void (*)(struct kref *))kfree);
+
+	if (atomic_sub_and_test((int) count, &kref->refcount)) {
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
+
 EXPORT_SYMBOL(kref_init);
 EXPORT_SYMBOL(kref_get);
 EXPORT_SYMBOL(kref_put);
+EXPORT_SYMBOL(kref_sub);