radeon_fence.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. /*
  2. * Copyright 2009 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Jerome Glisse <glisse@freedesktop.org>
  29. * Dave Airlie
  30. */
  31. #include <linux/seq_file.h>
  32. #include <linux/atomic.h>
  33. #include <linux/wait.h>
  34. #include <linux/list.h>
  35. #include <linux/kref.h>
  36. #include <linux/slab.h>
  37. #include <drm/drmP.h>
  38. #include "radeon_reg.h"
  39. #include "radeon.h"
  40. #include "radeon_trace.h"
  41. /*
  42. * Fences
  43. * Fences mark an event in the GPUs pipeline and are used
  44. * for GPU/CPU synchronization. When the fence is written,
  45. * it is expected that all buffers associated with that fence
  46. * are no longer in use by the associated ring on the GPU and
  47. * that the the relevant GPU caches have been flushed. Whether
  48. * we use a scratch register or memory location depends on the asic
  49. * and whether writeback is enabled.
  50. */
  51. /**
  52. * radeon_fence_write - write a fence value
  53. *
  54. * @rdev: radeon_device pointer
  55. * @seq: sequence number to write
  56. * @ring: ring index the fence is associated with
  57. *
  58. * Writes a fence value to memory or a scratch register (all asics).
  59. */
  60. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  61. {
  62. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  63. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  64. *drv->cpu_addr = cpu_to_le32(seq);
  65. } else {
  66. WREG32(drv->scratch_reg, seq);
  67. }
  68. }
  69. /**
  70. * radeon_fence_read - read a fence value
  71. *
  72. * @rdev: radeon_device pointer
  73. * @ring: ring index the fence is associated with
  74. *
  75. * Reads a fence value from memory or a scratch register (all asics).
  76. * Returns the value of the fence read from memory or register.
  77. */
  78. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  79. {
  80. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  81. u32 seq = 0;
  82. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  83. seq = le32_to_cpu(*drv->cpu_addr);
  84. } else {
  85. seq = RREG32(drv->scratch_reg);
  86. }
  87. return seq;
  88. }
  89. /**
  90. * radeon_fence_emit - emit a fence on the requested ring
  91. *
  92. * @rdev: radeon_device pointer
  93. * @fence: radeon fence object
  94. * @ring: ring index the fence is associated with
  95. *
  96. * Emits a fence command on the requested ring (all asics).
  97. * Returns 0 on success, -ENOMEM on failure.
  98. */
  99. int radeon_fence_emit(struct radeon_device *rdev,
  100. struct radeon_fence **fence,
  101. int ring)
  102. {
  103. /* we are protected by the ring emission mutex */
  104. *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  105. if ((*fence) == NULL) {
  106. return -ENOMEM;
  107. }
  108. kref_init(&((*fence)->kref));
  109. (*fence)->rdev = rdev;
  110. (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
  111. (*fence)->ring = ring;
  112. radeon_fence_ring_emit(rdev, ring, *fence);
  113. trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
  114. return 0;
  115. }
  116. /**
  117. * radeon_fence_process - process a fence
  118. *
  119. * @rdev: radeon_device pointer
  120. * @ring: ring index the fence is associated with
  121. *
  122. * Checks the current fence value and wakes the fence queue
  123. * if the sequence number has increased (all asics).
  124. */
  125. void radeon_fence_process(struct radeon_device *rdev, int ring)
  126. {
  127. uint64_t seq, last_seq, last_emitted;
  128. unsigned count_loop = 0;
  129. bool wake = false;
  130. /* Note there is a scenario here for an infinite loop but it's
  131. * very unlikely to happen. For it to happen, the current polling
  132. * process need to be interrupted by another process and another
  133. * process needs to update the last_seq btw the atomic read and
  134. * xchg of the current process.
  135. *
  136. * More over for this to go in infinite loop there need to be
  137. * continuously new fence signaled ie radeon_fence_read needs
  138. * to return a different value each time for both the currently
  139. * polling process and the other process that xchg the last_seq
  140. * btw atomic read and xchg of the current process. And the
  141. * value the other process set as last seq must be higher than
  142. * the seq value we just read. Which means that current process
  143. * need to be interrupted after radeon_fence_read and before
  144. * atomic xchg.
  145. *
  146. * To be even more safe we count the number of time we loop and
  147. * we bail after 10 loop just accepting the fact that we might
  148. * have temporarly set the last_seq not to the true real last
  149. * seq but to an older one.
  150. */
  151. last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  152. do {
  153. last_emitted = rdev->fence_drv[ring].sync_seq[ring];
  154. seq = radeon_fence_read(rdev, ring);
  155. seq |= last_seq & 0xffffffff00000000LL;
  156. if (seq < last_seq) {
  157. seq &= 0xffffffff;
  158. seq |= last_emitted & 0xffffffff00000000LL;
  159. }
  160. if (seq <= last_seq || seq > last_emitted) {
  161. break;
  162. }
  163. /* If we loop over we don't want to return without
  164. * checking if a fence is signaled as it means that the
  165. * seq we just read is different from the previous on.
  166. */
  167. wake = true;
  168. last_seq = seq;
  169. if ((count_loop++) > 10) {
  170. /* We looped over too many time leave with the
  171. * fact that we might have set an older fence
  172. * seq then the current real last seq as signaled
  173. * by the hw.
  174. */
  175. break;
  176. }
  177. } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  178. if (wake) {
  179. rdev->fence_drv[ring].last_activity = jiffies;
  180. wake_up_all(&rdev->fence_queue);
  181. }
  182. }
  183. /**
  184. * radeon_fence_destroy - destroy a fence
  185. *
  186. * @kref: fence kref
  187. *
  188. * Frees the fence object (all asics).
  189. */
  190. static void radeon_fence_destroy(struct kref *kref)
  191. {
  192. struct radeon_fence *fence;
  193. fence = container_of(kref, struct radeon_fence, kref);
  194. kfree(fence);
  195. }
  196. /**
  197. * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
  198. *
  199. * @rdev: radeon device pointer
  200. * @seq: sequence number
  201. * @ring: ring index the fence is associated with
  202. *
  203. * Check if the last singled fence sequnce number is >= the requested
  204. * sequence number (all asics).
  205. * Returns true if the fence has signaled (current fence value
  206. * is >= requested value) or false if it has not (current fence
  207. * value is < the requested value. Helper function for
  208. * radeon_fence_signaled().
  209. */
  210. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  211. u64 seq, unsigned ring)
  212. {
  213. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  214. return true;
  215. }
  216. /* poll new last sequence at least once */
  217. radeon_fence_process(rdev, ring);
  218. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  219. return true;
  220. }
  221. return false;
  222. }
  223. /**
  224. * radeon_fence_signaled - check if a fence has signaled
  225. *
  226. * @fence: radeon fence object
  227. *
  228. * Check if the requested fence has signaled (all asics).
  229. * Returns true if the fence has signaled or false if it has not.
  230. */
  231. bool radeon_fence_signaled(struct radeon_fence *fence)
  232. {
  233. if (!fence) {
  234. return true;
  235. }
  236. if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
  237. return true;
  238. }
  239. if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  240. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  241. return true;
  242. }
  243. return false;
  244. }
  245. /**
  246. * radeon_fence_wait_seq - wait for a specific sequence number
  247. *
  248. * @rdev: radeon device pointer
  249. * @target_seq: sequence number we want to wait for
  250. * @ring: ring index the fence is associated with
  251. * @intr: use interruptable sleep
  252. * @lock_ring: whether the ring should be locked or not
  253. *
  254. * Wait for the requested sequence number to be written (all asics).
  255. * @intr selects whether to use interruptable (true) or non-interruptable
  256. * (false) sleep when waiting for the sequence number. Helper function
  257. * for radeon_fence_wait(), et al.
  258. * Returns 0 if the sequence number has passed, error for all other cases.
  259. * -EDEADLK is returned when a GPU lockup has been detected and the ring is
  260. * marked as not ready so no further jobs get scheduled until a successful
  261. * reset.
  262. */
  263. static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
  264. unsigned ring, bool intr, bool lock_ring)
  265. {
  266. unsigned long timeout, last_activity;
  267. uint64_t seq;
  268. unsigned i;
  269. bool signaled;
  270. int r;
  271. while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  272. if (!rdev->ring[ring].ready) {
  273. return -EBUSY;
  274. }
  275. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  276. if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
  277. /* the normal case, timeout is somewhere before last_activity */
  278. timeout = rdev->fence_drv[ring].last_activity - timeout;
  279. } else {
  280. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  281. * anyway we will just wait for the minimum amount and then check for a lockup
  282. */
  283. timeout = 1;
  284. }
  285. seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  286. /* Save current last activity valuee, used to check for GPU lockups */
  287. last_activity = rdev->fence_drv[ring].last_activity;
  288. trace_radeon_fence_wait_begin(rdev->ddev, seq);
  289. radeon_irq_kms_sw_irq_get(rdev, ring);
  290. if (intr) {
  291. r = wait_event_interruptible_timeout(rdev->fence_queue,
  292. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  293. timeout);
  294. } else {
  295. r = wait_event_timeout(rdev->fence_queue,
  296. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  297. timeout);
  298. }
  299. radeon_irq_kms_sw_irq_put(rdev, ring);
  300. if (unlikely(r < 0)) {
  301. return r;
  302. }
  303. trace_radeon_fence_wait_end(rdev->ddev, seq);
  304. if (unlikely(!signaled)) {
  305. /* we were interrupted for some reason and fence
  306. * isn't signaled yet, resume waiting */
  307. if (r) {
  308. continue;
  309. }
  310. /* check if sequence value has changed since last_activity */
  311. if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  312. continue;
  313. }
  314. if (lock_ring) {
  315. mutex_lock(&rdev->ring_lock);
  316. }
  317. /* test if somebody else has already decided that this is a lockup */
  318. if (last_activity != rdev->fence_drv[ring].last_activity) {
  319. if (lock_ring) {
  320. mutex_unlock(&rdev->ring_lock);
  321. }
  322. continue;
  323. }
  324. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  325. /* good news we believe it's a lockup */
  326. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
  327. target_seq, seq);
  328. /* change last activity so nobody else think there is a lockup */
  329. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  330. rdev->fence_drv[i].last_activity = jiffies;
  331. }
  332. /* mark the ring as not ready any more */
  333. rdev->ring[ring].ready = false;
  334. if (lock_ring) {
  335. mutex_unlock(&rdev->ring_lock);
  336. }
  337. return -EDEADLK;
  338. }
  339. if (lock_ring) {
  340. mutex_unlock(&rdev->ring_lock);
  341. }
  342. }
  343. }
  344. return 0;
  345. }
  346. /**
  347. * radeon_fence_wait - wait for a fence to signal
  348. *
  349. * @fence: radeon fence object
  350. * @intr: use interruptable sleep
  351. *
  352. * Wait for the requested fence to signal (all asics).
  353. * @intr selects whether to use interruptable (true) or non-interruptable
  354. * (false) sleep when waiting for the fence.
  355. * Returns 0 if the fence has passed, error for all other cases.
  356. */
  357. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  358. {
  359. int r;
  360. if (fence == NULL) {
  361. WARN(1, "Querying an invalid fence : %p !\n", fence);
  362. return -EINVAL;
  363. }
  364. r = radeon_fence_wait_seq(fence->rdev, fence->seq,
  365. fence->ring, intr, true);
  366. if (r) {
  367. return r;
  368. }
  369. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  370. return 0;
  371. }
  372. static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
  373. {
  374. unsigned i;
  375. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  376. if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
  377. return true;
  378. }
  379. }
  380. return false;
  381. }
  382. /**
  383. * radeon_fence_wait_any_seq - wait for a sequence number on any ring
  384. *
  385. * @rdev: radeon device pointer
  386. * @target_seq: sequence number(s) we want to wait for
  387. * @intr: use interruptable sleep
  388. *
  389. * Wait for the requested sequence number(s) to be written by any ring
  390. * (all asics). Sequnce number array is indexed by ring id.
  391. * @intr selects whether to use interruptable (true) or non-interruptable
  392. * (false) sleep when waiting for the sequence number. Helper function
  393. * for radeon_fence_wait_any(), et al.
  394. * Returns 0 if the sequence number has passed, error for all other cases.
  395. */
  396. static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
  397. u64 *target_seq, bool intr)
  398. {
  399. unsigned long timeout, last_activity, tmp;
  400. unsigned i, ring = RADEON_NUM_RINGS;
  401. bool signaled;
  402. int r;
  403. for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
  404. if (!target_seq[i]) {
  405. continue;
  406. }
  407. /* use the most recent one as indicator */
  408. if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
  409. last_activity = rdev->fence_drv[i].last_activity;
  410. }
  411. /* For lockup detection just pick the lowest ring we are
  412. * actively waiting for
  413. */
  414. if (i < ring) {
  415. ring = i;
  416. }
  417. }
  418. /* nothing to wait for ? */
  419. if (ring == RADEON_NUM_RINGS) {
  420. return -ENOENT;
  421. }
  422. while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
  423. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  424. if (time_after(last_activity, timeout)) {
  425. /* the normal case, timeout is somewhere before last_activity */
  426. timeout = last_activity - timeout;
  427. } else {
  428. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  429. * anyway we will just wait for the minimum amount and then check for a lockup
  430. */
  431. timeout = 1;
  432. }
  433. trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]);
  434. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  435. if (target_seq[i]) {
  436. radeon_irq_kms_sw_irq_get(rdev, i);
  437. }
  438. }
  439. if (intr) {
  440. r = wait_event_interruptible_timeout(rdev->fence_queue,
  441. (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
  442. timeout);
  443. } else {
  444. r = wait_event_timeout(rdev->fence_queue,
  445. (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
  446. timeout);
  447. }
  448. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  449. if (target_seq[i]) {
  450. radeon_irq_kms_sw_irq_put(rdev, i);
  451. }
  452. }
  453. if (unlikely(r < 0)) {
  454. return r;
  455. }
  456. trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]);
  457. if (unlikely(!signaled)) {
  458. /* we were interrupted for some reason and fence
  459. * isn't signaled yet, resume waiting */
  460. if (r) {
  461. continue;
  462. }
  463. mutex_lock(&rdev->ring_lock);
  464. for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
  465. if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
  466. tmp = rdev->fence_drv[i].last_activity;
  467. }
  468. }
  469. /* test if somebody else has already decided that this is a lockup */
  470. if (last_activity != tmp) {
  471. last_activity = tmp;
  472. mutex_unlock(&rdev->ring_lock);
  473. continue;
  474. }
  475. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  476. /* good news we believe it's a lockup */
  477. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n",
  478. target_seq[ring]);
  479. /* change last activity so nobody else think there is a lockup */
  480. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  481. rdev->fence_drv[i].last_activity = jiffies;
  482. }
  483. /* mark the ring as not ready any more */
  484. rdev->ring[ring].ready = false;
  485. mutex_unlock(&rdev->ring_lock);
  486. return -EDEADLK;
  487. }
  488. mutex_unlock(&rdev->ring_lock);
  489. }
  490. }
  491. return 0;
  492. }
  493. /**
  494. * radeon_fence_wait_any - wait for a fence to signal on any ring
  495. *
  496. * @rdev: radeon device pointer
  497. * @fences: radeon fence object(s)
  498. * @intr: use interruptable sleep
  499. *
  500. * Wait for any requested fence to signal (all asics). Fence
  501. * array is indexed by ring id. @intr selects whether to use
  502. * interruptable (true) or non-interruptable (false) sleep when
  503. * waiting for the fences. Used by the suballocator.
  504. * Returns 0 if any fence has passed, error for all other cases.
  505. */
  506. int radeon_fence_wait_any(struct radeon_device *rdev,
  507. struct radeon_fence **fences,
  508. bool intr)
  509. {
  510. uint64_t seq[RADEON_NUM_RINGS];
  511. unsigned i;
  512. int r;
  513. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  514. seq[i] = 0;
  515. if (!fences[i]) {
  516. continue;
  517. }
  518. if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
  519. /* something was allready signaled */
  520. return 0;
  521. }
  522. seq[i] = fences[i]->seq;
  523. }
  524. r = radeon_fence_wait_any_seq(rdev, seq, intr);
  525. if (r) {
  526. return r;
  527. }
  528. return 0;
  529. }
  530. /**
  531. * radeon_fence_wait_next_locked - wait for the next fence to signal
  532. *
  533. * @rdev: radeon device pointer
  534. * @ring: ring index the fence is associated with
  535. *
  536. * Wait for the next fence on the requested ring to signal (all asics).
  537. * Returns 0 if the next fence has passed, error for all other cases.
  538. * Caller must hold ring lock.
  539. */
  540. int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
  541. {
  542. uint64_t seq;
  543. seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  544. if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
  545. /* nothing to wait for, last_seq is
  546. already the last emited fence */
  547. return -ENOENT;
  548. }
  549. return radeon_fence_wait_seq(rdev, seq, ring, false, false);
  550. }
  551. /**
  552. * radeon_fence_wait_empty_locked - wait for all fences to signal
  553. *
  554. * @rdev: radeon device pointer
  555. * @ring: ring index the fence is associated with
  556. *
  557. * Wait for all fences on the requested ring to signal (all asics).
  558. * Returns 0 if the fences have passed, error for all other cases.
  559. * Caller must hold ring lock.
  560. */
  561. int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
  562. {
  563. uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
  564. int r;
  565. r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
  566. if (r) {
  567. if (r == -EDEADLK) {
  568. return -EDEADLK;
  569. }
  570. dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
  571. ring, r);
  572. }
  573. return 0;
  574. }
  575. /**
  576. * radeon_fence_ref - take a ref on a fence
  577. *
  578. * @fence: radeon fence object
  579. *
  580. * Take a reference on a fence (all asics).
  581. * Returns the fence.
  582. */
  583. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  584. {
  585. kref_get(&fence->kref);
  586. return fence;
  587. }
  588. /**
  589. * radeon_fence_unref - remove a ref on a fence
  590. *
  591. * @fence: radeon fence object
  592. *
  593. * Remove a reference on a fence (all asics).
  594. */
  595. void radeon_fence_unref(struct radeon_fence **fence)
  596. {
  597. struct radeon_fence *tmp = *fence;
  598. *fence = NULL;
  599. if (tmp) {
  600. kref_put(&tmp->kref, radeon_fence_destroy);
  601. }
  602. }
  603. /**
  604. * radeon_fence_count_emitted - get the count of emitted fences
  605. *
  606. * @rdev: radeon device pointer
  607. * @ring: ring index the fence is associated with
  608. *
  609. * Get the number of fences emitted on the requested ring (all asics).
  610. * Returns the number of emitted fences on the ring. Used by the
  611. * dynpm code to ring track activity.
  612. */
  613. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  614. {
  615. uint64_t emitted;
  616. /* We are not protected by ring lock when reading the last sequence
  617. * but it's ok to report slightly wrong fence count here.
  618. */
  619. radeon_fence_process(rdev, ring);
  620. emitted = rdev->fence_drv[ring].sync_seq[ring]
  621. - atomic64_read(&rdev->fence_drv[ring].last_seq);
  622. /* to avoid 32bits warp around */
  623. if (emitted > 0x10000000) {
  624. emitted = 0x10000000;
  625. }
  626. return (unsigned)emitted;
  627. }
  628. /**
  629. * radeon_fence_need_sync - do we need a semaphore
  630. *
  631. * @fence: radeon fence object
  632. * @dst_ring: which ring to check against
  633. *
  634. * Check if the fence needs to be synced against another ring
  635. * (all asics). If so, we need to emit a semaphore.
  636. * Returns true if we need to sync with another ring, false if
  637. * not.
  638. */
  639. bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
  640. {
  641. struct radeon_fence_driver *fdrv;
  642. if (!fence) {
  643. return false;
  644. }
  645. if (fence->ring == dst_ring) {
  646. return false;
  647. }
  648. /* we are protected by the ring mutex */
  649. fdrv = &fence->rdev->fence_drv[dst_ring];
  650. if (fence->seq <= fdrv->sync_seq[fence->ring]) {
  651. return false;
  652. }
  653. return true;
  654. }
  655. /**
  656. * radeon_fence_note_sync - record the sync point
  657. *
  658. * @fence: radeon fence object
  659. * @dst_ring: which ring to check against
  660. *
  661. * Note the sequence number at which point the fence will
  662. * be synced with the requested ring (all asics).
  663. */
  664. void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
  665. {
  666. struct radeon_fence_driver *dst, *src;
  667. unsigned i;
  668. if (!fence) {
  669. return;
  670. }
  671. if (fence->ring == dst_ring) {
  672. return;
  673. }
  674. /* we are protected by the ring mutex */
  675. src = &fence->rdev->fence_drv[fence->ring];
  676. dst = &fence->rdev->fence_drv[dst_ring];
  677. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  678. if (i == dst_ring) {
  679. continue;
  680. }
  681. dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
  682. }
  683. }
  684. /**
  685. * radeon_fence_driver_start_ring - make the fence driver
  686. * ready for use on the requested ring.
  687. *
  688. * @rdev: radeon device pointer
  689. * @ring: ring index to start the fence driver on
  690. *
  691. * Make the fence driver ready for processing (all asics).
  692. * Not all asics have all rings, so each asic will only
  693. * start the fence driver on the rings it has.
  694. * Returns 0 for success, errors for failure.
  695. */
  696. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  697. {
  698. uint64_t index;
  699. int r;
  700. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  701. if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
  702. rdev->fence_drv[ring].scratch_reg = 0;
  703. index = R600_WB_EVENT_OFFSET + ring * 4;
  704. } else {
  705. r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  706. if (r) {
  707. dev_err(rdev->dev, "fence failed to get scratch register\n");
  708. return r;
  709. }
  710. index = RADEON_WB_SCRATCH_OFFSET +
  711. rdev->fence_drv[ring].scratch_reg -
  712. rdev->scratch.reg_base;
  713. }
  714. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  715. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  716. radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
  717. rdev->fence_drv[ring].initialized = true;
  718. dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
  719. ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
  720. return 0;
  721. }
  722. /**
  723. * radeon_fence_driver_init_ring - init the fence driver
  724. * for the requested ring.
  725. *
  726. * @rdev: radeon device pointer
  727. * @ring: ring index to start the fence driver on
  728. *
  729. * Init the fence driver for the requested ring (all asics).
  730. * Helper function for radeon_fence_driver_init().
  731. */
  732. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  733. {
  734. int i;
  735. rdev->fence_drv[ring].scratch_reg = -1;
  736. rdev->fence_drv[ring].cpu_addr = NULL;
  737. rdev->fence_drv[ring].gpu_addr = 0;
  738. for (i = 0; i < RADEON_NUM_RINGS; ++i)
  739. rdev->fence_drv[ring].sync_seq[i] = 0;
  740. atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  741. rdev->fence_drv[ring].last_activity = jiffies;
  742. rdev->fence_drv[ring].initialized = false;
  743. }
  744. /**
  745. * radeon_fence_driver_init - init the fence driver
  746. * for all possible rings.
  747. *
  748. * @rdev: radeon device pointer
  749. *
  750. * Init the fence driver for all possible rings (all asics).
  751. * Not all asics have all rings, so each asic will only
  752. * start the fence driver on the rings it has using
  753. * radeon_fence_driver_start_ring().
  754. * Returns 0 for success.
  755. */
  756. int radeon_fence_driver_init(struct radeon_device *rdev)
  757. {
  758. int ring;
  759. init_waitqueue_head(&rdev->fence_queue);
  760. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  761. radeon_fence_driver_init_ring(rdev, ring);
  762. }
  763. if (radeon_debugfs_fence_init(rdev)) {
  764. dev_err(rdev->dev, "fence debugfs file creation failed\n");
  765. }
  766. return 0;
  767. }
  768. /**
  769. * radeon_fence_driver_fini - tear down the fence driver
  770. * for all possible rings.
  771. *
  772. * @rdev: radeon device pointer
  773. *
  774. * Tear down the fence driver for all possible rings (all asics).
  775. */
  776. void radeon_fence_driver_fini(struct radeon_device *rdev)
  777. {
  778. int ring, r;
  779. mutex_lock(&rdev->ring_lock);
  780. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  781. if (!rdev->fence_drv[ring].initialized)
  782. continue;
  783. r = radeon_fence_wait_empty_locked(rdev, ring);
  784. if (r) {
  785. /* no need to trigger GPU reset as we are unloading */
  786. radeon_fence_driver_force_completion(rdev);
  787. }
  788. wake_up_all(&rdev->fence_queue);
  789. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  790. rdev->fence_drv[ring].initialized = false;
  791. }
  792. mutex_unlock(&rdev->ring_lock);
  793. }
  794. /**
  795. * radeon_fence_driver_force_completion - force all fence waiter to complete
  796. *
  797. * @rdev: radeon device pointer
  798. *
  799. * In case of GPU reset failure make sure no process keep waiting on fence
  800. * that will never complete.
  801. */
  802. void radeon_fence_driver_force_completion(struct radeon_device *rdev)
  803. {
  804. int ring;
  805. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  806. if (!rdev->fence_drv[ring].initialized)
  807. continue;
  808. radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
  809. }
  810. }
  811. /*
  812. * Fence debugfs
  813. */
  814. #if defined(CONFIG_DEBUG_FS)
  815. static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
  816. {
  817. struct drm_info_node *node = (struct drm_info_node *)m->private;
  818. struct drm_device *dev = node->minor->dev;
  819. struct radeon_device *rdev = dev->dev_private;
  820. int i, j;
  821. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  822. if (!rdev->fence_drv[i].initialized)
  823. continue;
  824. seq_printf(m, "--- ring %d ---\n", i);
  825. seq_printf(m, "Last signaled fence 0x%016llx\n",
  826. (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
  827. seq_printf(m, "Last emitted 0x%016llx\n",
  828. rdev->fence_drv[i].sync_seq[i]);
  829. for (j = 0; j < RADEON_NUM_RINGS; ++j) {
  830. if (i != j && rdev->fence_drv[j].initialized)
  831. seq_printf(m, "Last sync to ring %d 0x%016llx\n",
  832. j, rdev->fence_drv[i].sync_seq[j]);
  833. }
  834. }
  835. return 0;
  836. }
  837. static struct drm_info_list radeon_debugfs_fence_list[] = {
  838. {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
  839. };
  840. #endif
  841. int radeon_debugfs_fence_init(struct radeon_device *rdev)
  842. {
  843. #if defined(CONFIG_DEBUG_FS)
  844. return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
  845. #else
  846. return 0;
  847. #endif
  848. }