radeon_fence.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /*
  2. * Copyright 2009 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Jerome Glisse <glisse@freedesktop.org>
  29. * Dave Airlie
  30. */
  31. #include <linux/seq_file.h>
  32. #include <linux/atomic.h>
  33. #include <linux/wait.h>
  34. #include <linux/list.h>
  35. #include <linux/kref.h>
  36. #include <linux/slab.h>
  37. #include "drmP.h"
  38. #include "drm.h"
  39. #include "radeon_reg.h"
  40. #include "radeon.h"
  41. #include "radeon_trace.h"
  42. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  43. {
  44. if (rdev->wb.enabled) {
  45. *rdev->fence_drv[ring].cpu_addr = cpu_to_le32(seq);
  46. } else {
  47. WREG32(rdev->fence_drv[ring].scratch_reg, seq);
  48. }
  49. }
  50. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  51. {
  52. u32 seq = 0;
  53. if (rdev->wb.enabled) {
  54. seq = le32_to_cpu(*rdev->fence_drv[ring].cpu_addr);
  55. } else {
  56. seq = RREG32(rdev->fence_drv[ring].scratch_reg);
  57. }
  58. return seq;
  59. }
  60. int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
  61. {
  62. /* we are protected by the ring emission mutex */
  63. if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
  64. return 0;
  65. }
  66. fence->seq = ++rdev->fence_drv[fence->ring].seq;
  67. radeon_fence_ring_emit(rdev, fence->ring, fence);
  68. trace_radeon_fence_emit(rdev->ddev, fence->seq);
  69. return 0;
  70. }
  71. void radeon_fence_process(struct radeon_device *rdev, int ring)
  72. {
  73. uint64_t seq, last_seq;
  74. unsigned count_loop = 0;
  75. bool wake = false;
  76. /* Note there is a scenario here for an infinite loop but it's
  77. * very unlikely to happen. For it to happen, the current polling
  78. * process need to be interrupted by another process and another
  79. * process needs to update the last_seq btw the atomic read and
  80. * xchg of the current process.
  81. *
  82. * More over for this to go in infinite loop there need to be
  83. * continuously new fence signaled ie radeon_fence_read needs
  84. * to return a different value each time for both the currently
  85. * polling process and the other process that xchg the last_seq
  86. * btw atomic read and xchg of the current process. And the
  87. * value the other process set as last seq must be higher than
  88. * the seq value we just read. Which means that current process
  89. * need to be interrupted after radeon_fence_read and before
  90. * atomic xchg.
  91. *
  92. * To be even more safe we count the number of time we loop and
  93. * we bail after 10 loop just accepting the fact that we might
  94. * have temporarly set the last_seq not to the true real last
  95. * seq but to an older one.
  96. */
  97. last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  98. do {
  99. seq = radeon_fence_read(rdev, ring);
  100. seq |= last_seq & 0xffffffff00000000LL;
  101. if (seq < last_seq) {
  102. seq += 0x100000000LL;
  103. }
  104. if (seq == last_seq) {
  105. break;
  106. }
  107. /* If we loop over we don't want to return without
  108. * checking if a fence is signaled as it means that the
  109. * seq we just read is different from the previous on.
  110. */
  111. wake = true;
  112. last_seq = seq;
  113. if ((count_loop++) > 10) {
  114. /* We looped over too many time leave with the
  115. * fact that we might have set an older fence
  116. * seq then the current real last seq as signaled
  117. * by the hw.
  118. */
  119. break;
  120. }
  121. } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  122. if (wake) {
  123. rdev->fence_drv[ring].last_activity = jiffies;
  124. wake_up_all(&rdev->fence_drv[ring].queue);
  125. }
  126. }
  127. static void radeon_fence_destroy(struct kref *kref)
  128. {
  129. struct radeon_fence *fence;
  130. fence = container_of(kref, struct radeon_fence, kref);
  131. fence->seq = RADEON_FENCE_NOTEMITED_SEQ;
  132. if (fence->semaphore)
  133. radeon_semaphore_free(fence->rdev, fence->semaphore);
  134. kfree(fence);
  135. }
  136. int radeon_fence_create(struct radeon_device *rdev,
  137. struct radeon_fence **fence,
  138. int ring)
  139. {
  140. *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  141. if ((*fence) == NULL) {
  142. return -ENOMEM;
  143. }
  144. kref_init(&((*fence)->kref));
  145. (*fence)->rdev = rdev;
  146. (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ;
  147. (*fence)->ring = ring;
  148. (*fence)->semaphore = NULL;
  149. return 0;
  150. }
  151. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  152. u64 seq, unsigned ring)
  153. {
  154. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  155. return true;
  156. }
  157. /* poll new last sequence at least once */
  158. radeon_fence_process(rdev, ring);
  159. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  160. return true;
  161. }
  162. return false;
  163. }
  164. bool radeon_fence_signaled(struct radeon_fence *fence)
  165. {
  166. if (!fence) {
  167. return true;
  168. }
  169. if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
  170. WARN(1, "Querying an unemitted fence : %p !\n", fence);
  171. return true;
  172. }
  173. if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
  174. return true;
  175. }
  176. if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  177. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  178. return true;
  179. }
  180. return false;
  181. }
  182. static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
  183. unsigned ring, bool intr)
  184. {
  185. unsigned long timeout, last_activity;
  186. uint64_t seq;
  187. unsigned i;
  188. bool signaled;
  189. int r;
  190. while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  191. if (!rdev->ring[ring].ready) {
  192. return -EBUSY;
  193. }
  194. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  195. if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
  196. /* the normal case, timeout is somewhere before last_activity */
  197. timeout = rdev->fence_drv[ring].last_activity - timeout;
  198. } else {
  199. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  200. * anyway we will just wait for the minimum amount and then check for a lockup
  201. */
  202. timeout = 1;
  203. }
  204. seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  205. /* Save current last activity valuee, used to check for GPU lockups */
  206. last_activity = rdev->fence_drv[ring].last_activity;
  207. trace_radeon_fence_wait_begin(rdev->ddev, seq);
  208. radeon_irq_kms_sw_irq_get(rdev, ring);
  209. if (intr) {
  210. r = wait_event_interruptible_timeout(rdev->fence_drv[ring].queue,
  211. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  212. timeout);
  213. } else {
  214. r = wait_event_timeout(rdev->fence_drv[ring].queue,
  215. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  216. timeout);
  217. }
  218. radeon_irq_kms_sw_irq_put(rdev, ring);
  219. if (unlikely(r < 0)) {
  220. return r;
  221. }
  222. trace_radeon_fence_wait_end(rdev->ddev, seq);
  223. if (unlikely(!signaled)) {
  224. /* we were interrupted for some reason and fence
  225. * isn't signaled yet, resume waiting */
  226. if (r) {
  227. continue;
  228. }
  229. /* check if sequence value has changed since last_activity */
  230. if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  231. continue;
  232. }
  233. /* test if somebody else has already decided that this is a lockup */
  234. if (last_activity != rdev->fence_drv[ring].last_activity) {
  235. continue;
  236. }
  237. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  238. /* good news we believe it's a lockup */
  239. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
  240. target_seq, seq);
  241. /* change last activity so nobody else think there is a lockup */
  242. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  243. rdev->fence_drv[i].last_activity = jiffies;
  244. }
  245. /* change last activity so nobody else think there is a lockup */
  246. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  247. rdev->fence_drv[i].last_activity = jiffies;
  248. }
  249. /* mark the ring as not ready any more */
  250. rdev->ring[ring].ready = false;
  251. return -EDEADLK;
  252. }
  253. }
  254. }
  255. return 0;
  256. }
  257. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  258. {
  259. int r;
  260. if (fence == NULL) {
  261. WARN(1, "Querying an invalid fence : %p !\n", fence);
  262. return -EINVAL;
  263. }
  264. r = radeon_fence_wait_seq(fence->rdev, fence->seq, fence->ring, intr);
  265. if (r) {
  266. return r;
  267. }
  268. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  269. return 0;
  270. }
  271. int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
  272. {
  273. uint64_t seq;
  274. /* We are not protected by ring lock when reading current seq but
  275. * it's ok as worst case is we return to early while we could have
  276. * wait.
  277. */
  278. seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  279. if (seq >= rdev->fence_drv[ring].seq) {
  280. /* nothing to wait for, last_seq is already the last emited fence */
  281. return 0;
  282. }
  283. return radeon_fence_wait_seq(rdev, seq, ring, false);
  284. }
  285. int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  286. {
  287. /* We are not protected by ring lock when reading current seq
  288. * but it's ok as wait empty is call from place where no more
  289. * activity can be scheduled so there won't be concurrent access
  290. * to seq value.
  291. */
  292. return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, ring, false);
  293. }
  294. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  295. {
  296. kref_get(&fence->kref);
  297. return fence;
  298. }
  299. void radeon_fence_unref(struct radeon_fence **fence)
  300. {
  301. struct radeon_fence *tmp = *fence;
  302. *fence = NULL;
  303. if (tmp) {
  304. kref_put(&tmp->kref, radeon_fence_destroy);
  305. }
  306. }
  307. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  308. {
  309. uint64_t emitted;
  310. radeon_fence_process(rdev, ring);
  311. /* We are not protected by ring lock when reading the last sequence
  312. * but it's ok to report slightly wrong fence count here.
  313. */
  314. emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq);
  315. /* to avoid 32bits warp around */
  316. if (emitted > 0x10000000) {
  317. emitted = 0x10000000;
  318. }
  319. return (unsigned)emitted;
  320. }
  321. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  322. {
  323. uint64_t index;
  324. int r;
  325. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  326. if (rdev->wb.use_event) {
  327. rdev->fence_drv[ring].scratch_reg = 0;
  328. index = R600_WB_EVENT_OFFSET + ring * 4;
  329. } else {
  330. r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  331. if (r) {
  332. dev_err(rdev->dev, "fence failed to get scratch register\n");
  333. return r;
  334. }
  335. index = RADEON_WB_SCRATCH_OFFSET +
  336. rdev->fence_drv[ring].scratch_reg -
  337. rdev->scratch.reg_base;
  338. }
  339. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  340. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  341. radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
  342. rdev->fence_drv[ring].initialized = true;
  343. dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
  344. ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
  345. return 0;
  346. }
  347. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  348. {
  349. rdev->fence_drv[ring].scratch_reg = -1;
  350. rdev->fence_drv[ring].cpu_addr = NULL;
  351. rdev->fence_drv[ring].gpu_addr = 0;
  352. rdev->fence_drv[ring].seq = 0;
  353. atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  354. rdev->fence_drv[ring].last_activity = jiffies;
  355. init_waitqueue_head(&rdev->fence_drv[ring].queue);
  356. rdev->fence_drv[ring].initialized = false;
  357. }
  358. int radeon_fence_driver_init(struct radeon_device *rdev)
  359. {
  360. int ring;
  361. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  362. radeon_fence_driver_init_ring(rdev, ring);
  363. }
  364. if (radeon_debugfs_fence_init(rdev)) {
  365. dev_err(rdev->dev, "fence debugfs file creation failed\n");
  366. }
  367. return 0;
  368. }
  369. void radeon_fence_driver_fini(struct radeon_device *rdev)
  370. {
  371. int ring;
  372. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  373. if (!rdev->fence_drv[ring].initialized)
  374. continue;
  375. radeon_fence_wait_empty(rdev, ring);
  376. wake_up_all(&rdev->fence_drv[ring].queue);
  377. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  378. rdev->fence_drv[ring].initialized = false;
  379. }
  380. }
  381. /*
  382. * Fence debugfs
  383. */
  384. #if defined(CONFIG_DEBUG_FS)
  385. static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
  386. {
  387. struct drm_info_node *node = (struct drm_info_node *)m->private;
  388. struct drm_device *dev = node->minor->dev;
  389. struct radeon_device *rdev = dev->dev_private;
  390. int i;
  391. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  392. if (!rdev->fence_drv[i].initialized)
  393. continue;
  394. seq_printf(m, "--- ring %d ---\n", i);
  395. seq_printf(m, "Last signaled fence 0x%016lx\n",
  396. atomic64_read(&rdev->fence_drv[i].last_seq));
  397. seq_printf(m, "Last emitted 0x%016llx\n",
  398. rdev->fence_drv[i].seq);
  399. }
  400. return 0;
  401. }
  402. static struct drm_info_list radeon_debugfs_fence_list[] = {
  403. {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
  404. };
  405. #endif
  406. int radeon_debugfs_fence_init(struct radeon_device *rdev)
  407. {
  408. #if defined(CONFIG_DEBUG_FS)
  409. return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
  410. #else
  411. return 0;
  412. #endif
  413. }