12 years ago · 1f83fee08d
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1672,7 +1672,7 @@ i915_wedged_read(struct file *filp,
 
				 
			
 
				 	len = snprintf(buf, sizeof(buf),
			
 
				 		       "wedged :  %d\n",
			
 
				-		       atomic_read(&dev_priv->gpu_error.wedged));
			
 
				+		       atomic_read(&dev_priv->gpu_error.reset_counter));
			
 
				 
			
 
				 	if (len > sizeof(buf))
			
 
				 		len = sizeof(buf);
			
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -771,11 +771,37 @@ struct i915_gpu_error {
 
				 	/* Protected by the above dev->gpu_error.lock. */
			
 
				 	struct drm_i915_error_state *first_error;
			
 
				 	struct work_struct work;
			
 
				-	struct completion completion;
			
 
				 
			
 
				 	unsigned long last_reset;
			
 
				 
			
 
				-	atomic_t wedged;
			
 
				+	/**
			
 
				+	 * State variable controlling the reset flow
			
 
				+	 *
			
 
				+	 * Upper bits are for the reset counter.
			
 
				+	 *
			
 
				+	 * Lowest bit controls the reset state machine: Set means a reset is in
			
 
				+	 * progress. This state will (presuming we don't have any bugs) decay
			
 
				+	 * into either unset (successful reset) or the special WEDGED value (hw
			
 
				+	 * terminally sour). All waiters on the reset_queue will be woken when
			
 
				+	 * that happens.
			
 
				+	 */
			
 
				+	atomic_t reset_counter;
			
 
				+
			
 
				+	/**
			
 
				+	 * Special values/flags for reset_counter
			
 
				+	 *
			
 
				+	 * Note that the code relies on
			
 
				+	 * 	I915_WEDGED & I915_RESET_IN_PROGRESS_FLAG
			
 
				+	 * being true.
			
 
				+	 */
			
 
				+#define I915_RESET_IN_PROGRESS_FLAG	1
			
 
				+#define I915_WEDGED			0xffffffff
			
 
				+
			
 
				+	/**
			
 
				+	 * Waitqueue to signal when the reset has completed. Used by clients
			
 
				+	 * that wait for dev_priv->mm.wedged to settle.
			
 
				+	 */
			
 
				+	wait_queue_head_t reset_queue;
			
 
				 
			
 
				 	/* For gpu hang simulation. */
			
 
				 	unsigned int stop_rings;
			
@@ -1543,6 +1569,16 @@ void i915_gem_retire_requests(struct drm_device *dev);
 
				 void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
			
 
				 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
			
 
				 				      bool interruptible);
			
 
				+static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
			
 
				+{
			
 
				+	return unlikely(atomic_read(&error->reset_counter)
			
 
				+			& I915_RESET_IN_PROGRESS_FLAG);
			
 
				+}
			
 
				+
			
 
				+static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
			
 
				+{
			
 
				+	return atomic_read(&error->reset_counter) == I915_WEDGED;
			
 
				+}
			
 
				 
			
 
				 void i915_gem_reset(struct drm_device *dev);
			
 
				 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
			
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -89,36 +89,32 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 
				 static int
			
 
				 i915_gem_wait_for_error(struct i915_gpu_error *error)
			
 
				 {
			
 
				-	struct completion *x = &error->completion;
			
 
				-	unsigned long flags;
			
 
				 	int ret;
			
 
				 
			
 
				-	if (!atomic_read(&error->wedged))
			
 
				+#define EXIT_COND (!i915_reset_in_progress(error))
			
 
				+	if (EXIT_COND)
			
 
				 		return 0;
			
 
				 
			
 
				+	/* GPU is already declared terminally dead, give up. */
			
 
				+	if (i915_terminally_wedged(error))
			
 
				+		return -EIO;
			
 
				+
			
 
				 	/*
			
 
				 	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
			
 
				 	 * userspace. If it takes that long something really bad is going on and
			
 
				 	 * we should simply try to bail out and fail as gracefully as possible.
			
 
				 	 */
			
 
				-	ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
			
 
				+	ret = wait_event_interruptible_timeout(error->reset_queue,
			
 
				+					       EXIT_COND,
			
 
				+					       10*HZ);
			
 
				 	if (ret == 0) {
			
 
				 		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
			
 
				 		return -EIO;
			
 
				 	} else if (ret < 0) {
			
 
				 		return ret;
			
 
				 	}
			
 
				+#undef EXIT_COND
			
 
				 
			
 
				-	if (atomic_read(&error->wedged)) {
			
 
				-		/* GPU is hung, bump the completion count to account for
			
 
				-		 * the token we just consumed so that we never hit zero and
			
 
				-		 * end up waiting upon a subsequent completion event that
			
 
				-		 * will never happen.
			
 
				-		 */
			
 
				-		spin_lock_irqsave(&x->wait.lock, flags);
			
 
				-		x->done++;
			
 
				-		spin_unlock_irqrestore(&x->wait.lock, flags);
			
 
				-	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -942,23 +938,14 @@ int
 
				 i915_gem_check_wedge(struct i915_gpu_error *error,
			
 
				 		     bool interruptible)
			
 
				 {
			
 
				-	if (atomic_read(&error->wedged)) {
			
 
				-		struct completion *x = &error->completion;
			
 
				-		bool recovery_complete;
			
 
				-		unsigned long flags;
			
 
				-
			
 
				-		/* Give the error handler a chance to run. */
			
 
				-		spin_lock_irqsave(&x->wait.lock, flags);
			
 
				-		recovery_complete = x->done > 0;
			
 
				-		spin_unlock_irqrestore(&x->wait.lock, flags);
			
 
				-
			
 
				+	if (i915_reset_in_progress(error)) {
			
 
				 		/* Non-interruptible callers can't handle -EAGAIN, hence return
			
 
				 		 * -EIO unconditionally for these. */
			
 
				 		if (!interruptible)
			
 
				 			return -EIO;
			
 
				 
			
 
				-		/* Recovery complete, but still wedged means reset failure. */
			
 
				-		if (recovery_complete)
			
 
				+		/* Recovery complete, but the reset failed ... */
			
 
				+		if (i915_terminally_wedged(error))
			
 
				 			return -EIO;
			
 
				 
			
 
				 		return -EAGAIN;
			
@@ -1025,7 +1012,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 
				 
			
 
				 #define EXIT_COND \
			
 
				 	(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
			
 
				-	atomic_read(&dev_priv->gpu_error.wedged))
			
 
				+	i915_reset_in_progress(&dev_priv->gpu_error))
			
 
				 	do {
			
 
				 		if (interruptible)
			
 
				 			end = wait_event_interruptible_timeout(ring->irq_queue,
			
@@ -1379,7 +1366,7 @@ out:
 
				 		/* If this -EIO is due to a gpu hang, give the reset code a
			
 
				 		 * chance to clean up the mess. Otherwise return the proper
			
 
				 		 * SIGBUS. */
			
 
				-		if (!atomic_read(&dev_priv->gpu_error.wedged))
			
 
				+		if (i915_terminally_wedged(&dev_priv->gpu_error))
			
 
				 			return VM_FAULT_SIGBUS;
			
 
				 	case -EAGAIN:
			
 
				 		/* Give the error handler a chance to run and move the
			
@@ -3983,9 +3970,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 
				 	if (drm_core_check_feature(dev, DRIVER_MODESET))
			
 
				 		return 0;
			
 
				 
			
 
				-	if (atomic_read(&dev_priv->gpu_error.wedged)) {
			
 
				+	if (i915_reset_in_progress(&dev_priv->gpu_error)) {
			
 
				 		DRM_ERROR("Reenabling wedged hardware, good luck\n");
			
 
				-		atomic_set(&dev_priv->gpu_error.wedged, 0);
			
 
				+		atomic_set(&dev_priv->gpu_error.reset_counter, 0);
			
 
				 	}
			
 
				 
			
 
				 	mutex_lock(&dev->struct_mutex);
			
@@ -4069,7 +4056,7 @@ i915_gem_load(struct drm_device *dev)
 
				 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
			
 
				 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
			
 
				 			  i915_gem_retire_work_handler);
			
 
				-	init_completion(&dev_priv->gpu_error.completion);
			
 
				+	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
			
 
				 
			
 
				 	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
			
 
				 	if (IS_GEN3(dev)) {
			
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -862,8 +862,10 @@ done:
 
				  */
			
 
				 static void i915_error_work_func(struct work_struct *work)
			
 
				 {
			
 
				-	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
			
 
				-						    gpu_error.work);
			
 
				+	struct i915_gpu_error *error = container_of(work, struct i915_gpu_error,
			
 
				+						    work);
			
 
				+	drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t,
			
 
				+						    gpu_error);
			
 
				 	struct drm_device *dev = dev_priv->dev;
			
 
				 	char *error_event[] = { "ERROR=1", NULL };
			
 
				 	char *reset_event[] = { "RESET=1", NULL };
			
@@ -871,14 +873,18 @@ static void i915_error_work_func(struct work_struct *work)
 
				 
			
 
				 	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
			
 
				 
			
 
				-	if (atomic_read(&dev_priv->gpu_error.wedged)) {
			
 
				+	if (i915_reset_in_progress(error)) {
			
 
				 		DRM_DEBUG_DRIVER("resetting chip\n");
			
 
				 		kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
			
 
				+
			
 
				 		if (!i915_reset(dev)) {
			
 
				-			atomic_set(&dev_priv->gpu_error.wedged, 0);
			
 
				+			atomic_set(&error->reset_counter, 0);
			
 
				 			kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
			
 
				+		} else {
			
 
				+			atomic_set(&error->reset_counter, I915_WEDGED);
			
 
				 		}
			
 
				-		complete_all(&dev_priv->gpu_error.completion);
			
 
				+
			
 
				+		wake_up_all(&dev_priv->gpu_error.reset_queue);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1482,11 +1488,12 @@ void i915_handle_error(struct drm_device *dev, bool wedged)
 
				 	i915_report_and_clear_eir(dev);
			
 
				 
			
 
				 	if (wedged) {
			
 
				-		INIT_COMPLETION(dev_priv->gpu_error.completion);
			
 
				-		atomic_set(&dev_priv->gpu_error.wedged, 1);
			
 
				+		atomic_set(&dev_priv->gpu_error.reset_counter,
			
 
				+			   I915_RESET_IN_PROGRESS_FLAG);
			
 
				 
			
 
				 		/*
			
 
				-		 * Wakeup waiting processes so they don't hang
			
 
				+		 * Wakeup waiting processes so that the reset work item
			
 
				+		 * doesn't deadlock trying to grab various locks.
			
 
				 		 */
			
 
				 		for_each_ring(ring, dev_priv, i)
			
 
				 			wake_up_all(&ring->irq_queue);
			
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2223,7 +2223,7 @@ intel_finish_fb(struct drm_framebuffer *old_fb)
 
				 	WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue));
			
 
				 
			
 
				 	wait_event(dev_priv->pending_flip_queue,
			
 
				-		   atomic_read(&dev_priv->gpu_error.wedged) ||
			
 
				+		   i915_reset_in_progress(&dev_priv->gpu_error) ||
			
 
				 		   atomic_read(&obj->pending_flip) == 0);
			
 
				 
			
 
				 	/* Big Hammer, we also need to ensure that any pending
			
@@ -2871,7 +2871,7 @@ static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 
				 	unsigned long flags;
			
 
				 	bool pending;
			
 
				 
			
 
				-	if (atomic_read(&dev_priv->gpu_error.wedged))
			
 
				+	if (i915_reset_in_progress(&dev_priv->gpu_error))
			
 
				 		return false;
			
 
				 
			
 
				 	spin_lock_irqsave(&dev->event_lock, flags);