radeon_fence.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915
  1. /*
  2. * Copyright 2009 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Jerome Glisse <glisse@freedesktop.org>
  29. * Dave Airlie
  30. */
  31. #include <linux/seq_file.h>
  32. #include <linux/atomic.h>
  33. #include <linux/wait.h>
  34. #include <linux/list.h>
  35. #include <linux/kref.h>
  36. #include <linux/slab.h>
  37. #include "drmP.h"
  38. #include "drm.h"
  39. #include "radeon_reg.h"
  40. #include "radeon.h"
  41. #include "radeon_trace.h"
  42. /*
  43. * Fences
  44. * Fences mark an event in the GPUs pipeline and are used
  45. * for GPU/CPU synchronization. When the fence is written,
  46. * it is expected that all buffers associated with that fence
  47. * are no longer in use by the associated ring on the GPU and
  48. * that the the relevant GPU caches have been flushed. Whether
  49. * we use a scratch register or memory location depends on the asic
  50. * and whether writeback is enabled.
  51. */
  52. /**
  53. * radeon_fence_write - write a fence value
  54. *
  55. * @rdev: radeon_device pointer
  56. * @seq: sequence number to write
  57. * @ring: ring index the fence is associated with
  58. *
  59. * Writes a fence value to memory or a scratch register (all asics).
  60. */
  61. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  62. {
  63. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  64. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  65. *drv->cpu_addr = cpu_to_le32(seq);
  66. } else {
  67. WREG32(drv->scratch_reg, seq);
  68. }
  69. }
  70. /**
  71. * radeon_fence_read - read a fence value
  72. *
  73. * @rdev: radeon_device pointer
  74. * @ring: ring index the fence is associated with
  75. *
  76. * Reads a fence value from memory or a scratch register (all asics).
  77. * Returns the value of the fence read from memory or register.
  78. */
  79. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  80. {
  81. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  82. u32 seq = 0;
  83. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  84. seq = le32_to_cpu(*drv->cpu_addr);
  85. } else {
  86. seq = RREG32(drv->scratch_reg);
  87. }
  88. return seq;
  89. }
  90. /**
  91. * radeon_fence_emit - emit a fence on the requested ring
  92. *
  93. * @rdev: radeon_device pointer
  94. * @fence: radeon fence object
  95. * @ring: ring index the fence is associated with
  96. *
  97. * Emits a fence command on the requested ring (all asics).
  98. * Returns 0 on success, -ENOMEM on failure.
  99. */
  100. int radeon_fence_emit(struct radeon_device *rdev,
  101. struct radeon_fence **fence,
  102. int ring)
  103. {
  104. /* we are protected by the ring emission mutex */
  105. *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  106. if ((*fence) == NULL) {
  107. return -ENOMEM;
  108. }
  109. kref_init(&((*fence)->kref));
  110. (*fence)->rdev = rdev;
  111. (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
  112. (*fence)->ring = ring;
  113. radeon_fence_ring_emit(rdev, ring, *fence);
  114. trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
  115. return 0;
  116. }
  117. /**
  118. * radeon_fence_process - process a fence
  119. *
  120. * @rdev: radeon_device pointer
  121. * @ring: ring index the fence is associated with
  122. *
  123. * Checks the current fence value and wakes the fence queue
  124. * if the sequence number has increased (all asics).
  125. */
  126. void radeon_fence_process(struct radeon_device *rdev, int ring)
  127. {
  128. uint64_t seq, last_seq, last_emitted;
  129. unsigned count_loop = 0;
  130. bool wake = false;
  131. /* Note there is a scenario here for an infinite loop but it's
  132. * very unlikely to happen. For it to happen, the current polling
  133. * process need to be interrupted by another process and another
  134. * process needs to update the last_seq btw the atomic read and
  135. * xchg of the current process.
  136. *
  137. * More over for this to go in infinite loop there need to be
  138. * continuously new fence signaled ie radeon_fence_read needs
  139. * to return a different value each time for both the currently
  140. * polling process and the other process that xchg the last_seq
  141. * btw atomic read and xchg of the current process. And the
  142. * value the other process set as last seq must be higher than
  143. * the seq value we just read. Which means that current process
  144. * need to be interrupted after radeon_fence_read and before
  145. * atomic xchg.
  146. *
  147. * To be even more safe we count the number of time we loop and
  148. * we bail after 10 loop just accepting the fact that we might
  149. * have temporarly set the last_seq not to the true real last
  150. * seq but to an older one.
  151. */
  152. last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  153. do {
  154. last_emitted = rdev->fence_drv[ring].sync_seq[ring];
  155. seq = radeon_fence_read(rdev, ring);
  156. seq |= last_seq & 0xffffffff00000000LL;
  157. if (seq < last_seq) {
  158. seq &= 0xffffffff;
  159. seq |= last_emitted & 0xffffffff00000000LL;
  160. }
  161. if (seq <= last_seq || seq > last_emitted) {
  162. break;
  163. }
  164. /* If we loop over we don't want to return without
  165. * checking if a fence is signaled as it means that the
  166. * seq we just read is different from the previous on.
  167. */
  168. wake = true;
  169. last_seq = seq;
  170. if ((count_loop++) > 10) {
  171. /* We looped over too many time leave with the
  172. * fact that we might have set an older fence
  173. * seq then the current real last seq as signaled
  174. * by the hw.
  175. */
  176. break;
  177. }
  178. } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  179. if (wake) {
  180. rdev->fence_drv[ring].last_activity = jiffies;
  181. wake_up_all(&rdev->fence_queue);
  182. }
  183. }
  184. /**
  185. * radeon_fence_destroy - destroy a fence
  186. *
  187. * @kref: fence kref
  188. *
  189. * Frees the fence object (all asics).
  190. */
  191. static void radeon_fence_destroy(struct kref *kref)
  192. {
  193. struct radeon_fence *fence;
  194. fence = container_of(kref, struct radeon_fence, kref);
  195. kfree(fence);
  196. }
  197. /**
  198. * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
  199. *
  200. * @rdev: radeon device pointer
  201. * @seq: sequence number
  202. * @ring: ring index the fence is associated with
  203. *
  204. * Check if the last singled fence sequnce number is >= the requested
  205. * sequence number (all asics).
  206. * Returns true if the fence has signaled (current fence value
  207. * is >= requested value) or false if it has not (current fence
  208. * value is < the requested value. Helper function for
  209. * radeon_fence_signaled().
  210. */
  211. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  212. u64 seq, unsigned ring)
  213. {
  214. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  215. return true;
  216. }
  217. /* poll new last sequence at least once */
  218. radeon_fence_process(rdev, ring);
  219. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  220. return true;
  221. }
  222. return false;
  223. }
  224. /**
  225. * radeon_fence_signaled - check if a fence has signaled
  226. *
  227. * @fence: radeon fence object
  228. *
  229. * Check if the requested fence has signaled (all asics).
  230. * Returns true if the fence has signaled or false if it has not.
  231. */
  232. bool radeon_fence_signaled(struct radeon_fence *fence)
  233. {
  234. if (!fence) {
  235. return true;
  236. }
  237. if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
  238. return true;
  239. }
  240. if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  241. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  242. return true;
  243. }
  244. return false;
  245. }
  246. /**
  247. * radeon_fence_wait_seq - wait for a specific sequence number
  248. *
  249. * @rdev: radeon device pointer
  250. * @target_seq: sequence number we want to wait for
  251. * @ring: ring index the fence is associated with
  252. * @intr: use interruptable sleep
  253. * @lock_ring: whether the ring should be locked or not
  254. *
  255. * Wait for the requested sequence number to be written (all asics).
  256. * @intr selects whether to use interruptable (true) or non-interruptable
  257. * (false) sleep when waiting for the sequence number. Helper function
  258. * for radeon_fence_wait(), et al.
  259. * Returns 0 if the sequence number has passed, error for all other cases.
  260. * -EDEADLK is returned when a GPU lockup has been detected and the ring is
  261. * marked as not ready so no further jobs get scheduled until a successful
  262. * reset.
  263. */
  264. static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
  265. unsigned ring, bool intr, bool lock_ring)
  266. {
  267. unsigned long timeout, last_activity;
  268. uint64_t seq;
  269. unsigned i;
  270. bool signaled;
  271. int r;
  272. while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  273. if (!rdev->ring[ring].ready) {
  274. return -EBUSY;
  275. }
  276. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  277. if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
  278. /* the normal case, timeout is somewhere before last_activity */
  279. timeout = rdev->fence_drv[ring].last_activity - timeout;
  280. } else {
  281. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  282. * anyway we will just wait for the minimum amount and then check for a lockup
  283. */
  284. timeout = 1;
  285. }
  286. seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  287. /* Save current last activity valuee, used to check for GPU lockups */
  288. last_activity = rdev->fence_drv[ring].last_activity;
  289. trace_radeon_fence_wait_begin(rdev->ddev, seq);
  290. radeon_irq_kms_sw_irq_get(rdev, ring);
  291. if (intr) {
  292. r = wait_event_interruptible_timeout(rdev->fence_queue,
  293. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  294. timeout);
  295. } else {
  296. r = wait_event_timeout(rdev->fence_queue,
  297. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  298. timeout);
  299. }
  300. radeon_irq_kms_sw_irq_put(rdev, ring);
  301. if (unlikely(r < 0)) {
  302. return r;
  303. }
  304. trace_radeon_fence_wait_end(rdev->ddev, seq);
  305. if (unlikely(!signaled)) {
  306. /* we were interrupted for some reason and fence
  307. * isn't signaled yet, resume waiting */
  308. if (r) {
  309. continue;
  310. }
  311. /* check if sequence value has changed since last_activity */
  312. if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  313. continue;
  314. }
  315. if (lock_ring) {
  316. mutex_lock(&rdev->ring_lock);
  317. }
  318. /* test if somebody else has already decided that this is a lockup */
  319. if (last_activity != rdev->fence_drv[ring].last_activity) {
  320. if (lock_ring) {
  321. mutex_unlock(&rdev->ring_lock);
  322. }
  323. continue;
  324. }
  325. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  326. /* good news we believe it's a lockup */
  327. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
  328. target_seq, seq);
  329. /* change last activity so nobody else think there is a lockup */
  330. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  331. rdev->fence_drv[i].last_activity = jiffies;
  332. }
  333. /* mark the ring as not ready any more */
  334. rdev->ring[ring].ready = false;
  335. if (lock_ring) {
  336. mutex_unlock(&rdev->ring_lock);
  337. }
  338. return -EDEADLK;
  339. }
  340. if (lock_ring) {
  341. mutex_unlock(&rdev->ring_lock);
  342. }
  343. }
  344. }
  345. return 0;
  346. }
  347. /**
  348. * radeon_fence_wait - wait for a fence to signal
  349. *
  350. * @fence: radeon fence object
  351. * @intr: use interruptable sleep
  352. *
  353. * Wait for the requested fence to signal (all asics).
  354. * @intr selects whether to use interruptable (true) or non-interruptable
  355. * (false) sleep when waiting for the fence.
  356. * Returns 0 if the fence has passed, error for all other cases.
  357. */
  358. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  359. {
  360. int r;
  361. if (fence == NULL) {
  362. WARN(1, "Querying an invalid fence : %p !\n", fence);
  363. return -EINVAL;
  364. }
  365. r = radeon_fence_wait_seq(fence->rdev, fence->seq,
  366. fence->ring, intr, true);
  367. if (r) {
  368. return r;
  369. }
  370. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  371. return 0;
  372. }
  373. bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
  374. {
  375. unsigned i;
  376. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  377. if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
  378. return true;
  379. }
  380. }
  381. return false;
  382. }
  383. /**
  384. * radeon_fence_wait_any_seq - wait for a sequence number on any ring
  385. *
  386. * @rdev: radeon device pointer
  387. * @target_seq: sequence number(s) we want to wait for
  388. * @intr: use interruptable sleep
  389. *
  390. * Wait for the requested sequence number(s) to be written by any ring
  391. * (all asics). Sequnce number array is indexed by ring id.
  392. * @intr selects whether to use interruptable (true) or non-interruptable
  393. * (false) sleep when waiting for the sequence number. Helper function
  394. * for radeon_fence_wait_any(), et al.
  395. * Returns 0 if the sequence number has passed, error for all other cases.
  396. */
  397. static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
  398. u64 *target_seq, bool intr)
  399. {
  400. unsigned long timeout, last_activity, tmp;
  401. unsigned i, ring = RADEON_NUM_RINGS;
  402. bool signaled;
  403. int r;
  404. for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
  405. if (!target_seq[i]) {
  406. continue;
  407. }
  408. /* use the most recent one as indicator */
  409. if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
  410. last_activity = rdev->fence_drv[i].last_activity;
  411. }
  412. /* For lockup detection just pick the lowest ring we are
  413. * actively waiting for
  414. */
  415. if (i < ring) {
  416. ring = i;
  417. }
  418. }
  419. /* nothing to wait for ? */
  420. if (ring == RADEON_NUM_RINGS) {
  421. return -ENOENT;
  422. }
  423. while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
  424. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  425. if (time_after(last_activity, timeout)) {
  426. /* the normal case, timeout is somewhere before last_activity */
  427. timeout = last_activity - timeout;
  428. } else {
  429. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  430. * anyway we will just wait for the minimum amount and then check for a lockup
  431. */
  432. timeout = 1;
  433. }
  434. trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]);
  435. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  436. if (target_seq[i]) {
  437. radeon_irq_kms_sw_irq_get(rdev, i);
  438. }
  439. }
  440. if (intr) {
  441. r = wait_event_interruptible_timeout(rdev->fence_queue,
  442. (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
  443. timeout);
  444. } else {
  445. r = wait_event_timeout(rdev->fence_queue,
  446. (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
  447. timeout);
  448. }
  449. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  450. if (target_seq[i]) {
  451. radeon_irq_kms_sw_irq_put(rdev, i);
  452. }
  453. }
  454. if (unlikely(r < 0)) {
  455. return r;
  456. }
  457. trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]);
  458. if (unlikely(!signaled)) {
  459. /* we were interrupted for some reason and fence
  460. * isn't signaled yet, resume waiting */
  461. if (r) {
  462. continue;
  463. }
  464. mutex_lock(&rdev->ring_lock);
  465. for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
  466. if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
  467. tmp = rdev->fence_drv[i].last_activity;
  468. }
  469. }
  470. /* test if somebody else has already decided that this is a lockup */
  471. if (last_activity != tmp) {
  472. last_activity = tmp;
  473. mutex_unlock(&rdev->ring_lock);
  474. continue;
  475. }
  476. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  477. /* good news we believe it's a lockup */
  478. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n",
  479. target_seq[ring]);
  480. /* change last activity so nobody else think there is a lockup */
  481. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  482. rdev->fence_drv[i].last_activity = jiffies;
  483. }
  484. /* mark the ring as not ready any more */
  485. rdev->ring[ring].ready = false;
  486. mutex_unlock(&rdev->ring_lock);
  487. return -EDEADLK;
  488. }
  489. mutex_unlock(&rdev->ring_lock);
  490. }
  491. }
  492. return 0;
  493. }
  494. /**
  495. * radeon_fence_wait_any - wait for a fence to signal on any ring
  496. *
  497. * @rdev: radeon device pointer
  498. * @fences: radeon fence object(s)
  499. * @intr: use interruptable sleep
  500. *
  501. * Wait for any requested fence to signal (all asics). Fence
  502. * array is indexed by ring id. @intr selects whether to use
  503. * interruptable (true) or non-interruptable (false) sleep when
  504. * waiting for the fences. Used by the suballocator.
  505. * Returns 0 if any fence has passed, error for all other cases.
  506. */
  507. int radeon_fence_wait_any(struct radeon_device *rdev,
  508. struct radeon_fence **fences,
  509. bool intr)
  510. {
  511. uint64_t seq[RADEON_NUM_RINGS];
  512. unsigned i;
  513. int r;
  514. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  515. seq[i] = 0;
  516. if (!fences[i]) {
  517. continue;
  518. }
  519. if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
  520. /* something was allready signaled */
  521. return 0;
  522. }
  523. seq[i] = fences[i]->seq;
  524. }
  525. r = radeon_fence_wait_any_seq(rdev, seq, intr);
  526. if (r) {
  527. return r;
  528. }
  529. return 0;
  530. }
  531. /**
  532. * radeon_fence_wait_next_locked - wait for the next fence to signal
  533. *
  534. * @rdev: radeon device pointer
  535. * @ring: ring index the fence is associated with
  536. *
  537. * Wait for the next fence on the requested ring to signal (all asics).
  538. * Returns 0 if the next fence has passed, error for all other cases.
  539. * Caller must hold ring lock.
  540. */
  541. int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
  542. {
  543. uint64_t seq;
  544. seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  545. if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
  546. /* nothing to wait for, last_seq is
  547. already the last emited fence */
  548. return -ENOENT;
  549. }
  550. return radeon_fence_wait_seq(rdev, seq, ring, false, false);
  551. }
  552. /**
  553. * radeon_fence_wait_empty_locked - wait for all fences to signal
  554. *
  555. * @rdev: radeon device pointer
  556. * @ring: ring index the fence is associated with
  557. *
  558. * Wait for all fences on the requested ring to signal (all asics).
  559. * Returns 0 if the fences have passed, error for all other cases.
  560. * Caller must hold ring lock.
  561. */
  562. void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
  563. {
  564. uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
  565. while(1) {
  566. int r;
  567. r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
  568. if (r == -EDEADLK) {
  569. mutex_unlock(&rdev->ring_lock);
  570. r = radeon_gpu_reset(rdev);
  571. mutex_lock(&rdev->ring_lock);
  572. if (!r)
  573. continue;
  574. }
  575. if (r) {
  576. dev_err(rdev->dev, "error waiting for ring to become"
  577. " idle (%d)\n", r);
  578. }
  579. return;
  580. }
  581. }
  582. /**
  583. * radeon_fence_ref - take a ref on a fence
  584. *
  585. * @fence: radeon fence object
  586. *
  587. * Take a reference on a fence (all asics).
  588. * Returns the fence.
  589. */
  590. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  591. {
  592. kref_get(&fence->kref);
  593. return fence;
  594. }
  595. /**
  596. * radeon_fence_unref - remove a ref on a fence
  597. *
  598. * @fence: radeon fence object
  599. *
  600. * Remove a reference on a fence (all asics).
  601. */
  602. void radeon_fence_unref(struct radeon_fence **fence)
  603. {
  604. struct radeon_fence *tmp = *fence;
  605. *fence = NULL;
  606. if (tmp) {
  607. kref_put(&tmp->kref, radeon_fence_destroy);
  608. }
  609. }
  610. /**
  611. * radeon_fence_count_emitted - get the count of emitted fences
  612. *
  613. * @rdev: radeon device pointer
  614. * @ring: ring index the fence is associated with
  615. *
  616. * Get the number of fences emitted on the requested ring (all asics).
  617. * Returns the number of emitted fences on the ring. Used by the
  618. * dynpm code to ring track activity.
  619. */
  620. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  621. {
  622. uint64_t emitted;
  623. /* We are not protected by ring lock when reading the last sequence
  624. * but it's ok to report slightly wrong fence count here.
  625. */
  626. radeon_fence_process(rdev, ring);
  627. emitted = rdev->fence_drv[ring].sync_seq[ring]
  628. - atomic64_read(&rdev->fence_drv[ring].last_seq);
  629. /* to avoid 32bits warp around */
  630. if (emitted > 0x10000000) {
  631. emitted = 0x10000000;
  632. }
  633. return (unsigned)emitted;
  634. }
  635. /**
  636. * radeon_fence_need_sync - do we need a semaphore
  637. *
  638. * @fence: radeon fence object
  639. * @dst_ring: which ring to check against
  640. *
  641. * Check if the fence needs to be synced against another ring
  642. * (all asics). If so, we need to emit a semaphore.
  643. * Returns true if we need to sync with another ring, false if
  644. * not.
  645. */
  646. bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
  647. {
  648. struct radeon_fence_driver *fdrv;
  649. if (!fence) {
  650. return false;
  651. }
  652. if (fence->ring == dst_ring) {
  653. return false;
  654. }
  655. /* we are protected by the ring mutex */
  656. fdrv = &fence->rdev->fence_drv[dst_ring];
  657. if (fence->seq <= fdrv->sync_seq[fence->ring]) {
  658. return false;
  659. }
  660. return true;
  661. }
  662. /**
  663. * radeon_fence_note_sync - record the sync point
  664. *
  665. * @fence: radeon fence object
  666. * @dst_ring: which ring to check against
  667. *
  668. * Note the sequence number at which point the fence will
  669. * be synced with the requested ring (all asics).
  670. */
  671. void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
  672. {
  673. struct radeon_fence_driver *dst, *src;
  674. unsigned i;
  675. if (!fence) {
  676. return;
  677. }
  678. if (fence->ring == dst_ring) {
  679. return;
  680. }
  681. /* we are protected by the ring mutex */
  682. src = &fence->rdev->fence_drv[fence->ring];
  683. dst = &fence->rdev->fence_drv[dst_ring];
  684. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  685. if (i == dst_ring) {
  686. continue;
  687. }
  688. dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
  689. }
  690. }
  691. /**
  692. * radeon_fence_driver_start_ring - make the fence driver
  693. * ready for use on the requested ring.
  694. *
  695. * @rdev: radeon device pointer
  696. * @ring: ring index to start the fence driver on
  697. *
  698. * Make the fence driver ready for processing (all asics).
  699. * Not all asics have all rings, so each asic will only
  700. * start the fence driver on the rings it has.
  701. * Returns 0 for success, errors for failure.
  702. */
  703. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  704. {
  705. uint64_t index;
  706. int r;
  707. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  708. if (rdev->wb.use_event) {
  709. rdev->fence_drv[ring].scratch_reg = 0;
  710. index = R600_WB_EVENT_OFFSET + ring * 4;
  711. } else {
  712. r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  713. if (r) {
  714. dev_err(rdev->dev, "fence failed to get scratch register\n");
  715. return r;
  716. }
  717. index = RADEON_WB_SCRATCH_OFFSET +
  718. rdev->fence_drv[ring].scratch_reg -
  719. rdev->scratch.reg_base;
  720. }
  721. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  722. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  723. radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
  724. rdev->fence_drv[ring].initialized = true;
  725. dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
  726. ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
  727. return 0;
  728. }
  729. /**
  730. * radeon_fence_driver_init_ring - init the fence driver
  731. * for the requested ring.
  732. *
  733. * @rdev: radeon device pointer
  734. * @ring: ring index to start the fence driver on
  735. *
  736. * Init the fence driver for the requested ring (all asics).
  737. * Helper function for radeon_fence_driver_init().
  738. */
  739. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  740. {
  741. int i;
  742. rdev->fence_drv[ring].scratch_reg = -1;
  743. rdev->fence_drv[ring].cpu_addr = NULL;
  744. rdev->fence_drv[ring].gpu_addr = 0;
  745. for (i = 0; i < RADEON_NUM_RINGS; ++i)
  746. rdev->fence_drv[ring].sync_seq[i] = 0;
  747. atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  748. rdev->fence_drv[ring].last_activity = jiffies;
  749. rdev->fence_drv[ring].initialized = false;
  750. }
  751. /**
  752. * radeon_fence_driver_init - init the fence driver
  753. * for all possible rings.
  754. *
  755. * @rdev: radeon device pointer
  756. *
  757. * Init the fence driver for all possible rings (all asics).
  758. * Not all asics have all rings, so each asic will only
  759. * start the fence driver on the rings it has using
  760. * radeon_fence_driver_start_ring().
  761. * Returns 0 for success.
  762. */
  763. int radeon_fence_driver_init(struct radeon_device *rdev)
  764. {
  765. int ring;
  766. init_waitqueue_head(&rdev->fence_queue);
  767. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  768. radeon_fence_driver_init_ring(rdev, ring);
  769. }
  770. if (radeon_debugfs_fence_init(rdev)) {
  771. dev_err(rdev->dev, "fence debugfs file creation failed\n");
  772. }
  773. return 0;
  774. }
  775. /**
  776. * radeon_fence_driver_fini - tear down the fence driver
  777. * for all possible rings.
  778. *
  779. * @rdev: radeon device pointer
  780. *
  781. * Tear down the fence driver for all possible rings (all asics).
  782. */
  783. void radeon_fence_driver_fini(struct radeon_device *rdev)
  784. {
  785. int ring;
  786. mutex_lock(&rdev->ring_lock);
  787. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  788. if (!rdev->fence_drv[ring].initialized)
  789. continue;
  790. radeon_fence_wait_empty_locked(rdev, ring);
  791. wake_up_all(&rdev->fence_queue);
  792. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  793. rdev->fence_drv[ring].initialized = false;
  794. }
  795. mutex_unlock(&rdev->ring_lock);
  796. }
  797. /*
  798. * Fence debugfs
  799. */
  800. #if defined(CONFIG_DEBUG_FS)
  801. static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
  802. {
  803. struct drm_info_node *node = (struct drm_info_node *)m->private;
  804. struct drm_device *dev = node->minor->dev;
  805. struct radeon_device *rdev = dev->dev_private;
  806. int i, j;
  807. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  808. if (!rdev->fence_drv[i].initialized)
  809. continue;
  810. seq_printf(m, "--- ring %d ---\n", i);
  811. seq_printf(m, "Last signaled fence 0x%016llx\n",
  812. (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
  813. seq_printf(m, "Last emitted 0x%016llx\n",
  814. rdev->fence_drv[i].sync_seq[i]);
  815. for (j = 0; j < RADEON_NUM_RINGS; ++j) {
  816. if (i != j && rdev->fence_drv[j].initialized)
  817. seq_printf(m, "Last sync to ring %d 0x%016llx\n",
  818. j, rdev->fence_drv[i].sync_seq[j]);
  819. }
  820. }
  821. return 0;
  822. }
  823. static struct drm_info_list radeon_debugfs_fence_list[] = {
  824. {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
  825. };
  826. #endif
  827. int radeon_debugfs_fence_init(struct radeon_device *rdev)
  828. {
  829. #if defined(CONFIG_DEBUG_FS)
  830. return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
  831. #else
  832. return 0;
  833. #endif
  834. }