radeon_fence.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. /*
  2. * Copyright 2009 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the
  7. * "Software"), to deal in the Software without restriction, including
  8. * without limitation the rights to use, copy, modify, merge, publish,
  9. * distribute, sub license, and/or sell copies of the Software, and to
  10. * permit persons to whom the Software is furnished to do so, subject to
  11. * the following conditions:
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16. * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17. * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19. * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. *
  21. * The above copyright notice and this permission notice (including the
  22. * next paragraph) shall be included in all copies or substantial portions
  23. * of the Software.
  24. *
  25. */
  26. /*
  27. * Authors:
  28. * Jerome Glisse <glisse@freedesktop.org>
  29. * Dave Airlie
  30. */
  31. #include <linux/seq_file.h>
  32. #include <linux/atomic.h>
  33. #include <linux/wait.h>
  34. #include <linux/kref.h>
  35. #include <linux/slab.h>
  36. #include <linux/firmware.h>
  37. #include <drm/drmP.h>
  38. #include "radeon_reg.h"
  39. #include "radeon.h"
  40. #include "radeon_trace.h"
  41. /*
  42. * Fences
  43. * Fences mark an event in the GPUs pipeline and are used
  44. * for GPU/CPU synchronization. When the fence is written,
  45. * it is expected that all buffers associated with that fence
  46. * are no longer in use by the associated ring on the GPU and
  47. * that the the relevant GPU caches have been flushed. Whether
  48. * we use a scratch register or memory location depends on the asic
  49. * and whether writeback is enabled.
  50. */
  51. /**
  52. * radeon_fence_write - write a fence value
  53. *
  54. * @rdev: radeon_device pointer
  55. * @seq: sequence number to write
  56. * @ring: ring index the fence is associated with
  57. *
  58. * Writes a fence value to memory or a scratch register (all asics).
  59. */
  60. static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
  61. {
  62. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  63. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  64. if (drv->cpu_addr) {
  65. *drv->cpu_addr = cpu_to_le32(seq);
  66. }
  67. } else {
  68. WREG32(drv->scratch_reg, seq);
  69. }
  70. }
  71. /**
  72. * radeon_fence_read - read a fence value
  73. *
  74. * @rdev: radeon_device pointer
  75. * @ring: ring index the fence is associated with
  76. *
  77. * Reads a fence value from memory or a scratch register (all asics).
  78. * Returns the value of the fence read from memory or register.
  79. */
  80. static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
  81. {
  82. struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
  83. u32 seq = 0;
  84. if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
  85. if (drv->cpu_addr) {
  86. seq = le32_to_cpu(*drv->cpu_addr);
  87. } else {
  88. seq = lower_32_bits(atomic64_read(&drv->last_seq));
  89. }
  90. } else {
  91. seq = RREG32(drv->scratch_reg);
  92. }
  93. return seq;
  94. }
  95. /**
  96. * radeon_fence_emit - emit a fence on the requested ring
  97. *
  98. * @rdev: radeon_device pointer
  99. * @fence: radeon fence object
  100. * @ring: ring index the fence is associated with
  101. *
  102. * Emits a fence command on the requested ring (all asics).
  103. * Returns 0 on success, -ENOMEM on failure.
  104. */
  105. int radeon_fence_emit(struct radeon_device *rdev,
  106. struct radeon_fence **fence,
  107. int ring)
  108. {
  109. /* we are protected by the ring emission mutex */
  110. *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
  111. if ((*fence) == NULL) {
  112. return -ENOMEM;
  113. }
  114. kref_init(&((*fence)->kref));
  115. (*fence)->rdev = rdev;
  116. (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
  117. (*fence)->ring = ring;
  118. radeon_fence_ring_emit(rdev, ring, *fence);
  119. trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
  120. return 0;
  121. }
  122. /**
  123. * radeon_fence_process - process a fence
  124. *
  125. * @rdev: radeon_device pointer
  126. * @ring: ring index the fence is associated with
  127. *
  128. * Checks the current fence value and wakes the fence queue
  129. * if the sequence number has increased (all asics).
  130. */
  131. void radeon_fence_process(struct radeon_device *rdev, int ring)
  132. {
  133. uint64_t seq, last_seq, last_emitted;
  134. unsigned count_loop = 0;
  135. bool wake = false;
  136. /* Note there is a scenario here for an infinite loop but it's
  137. * very unlikely to happen. For it to happen, the current polling
  138. * process need to be interrupted by another process and another
  139. * process needs to update the last_seq btw the atomic read and
  140. * xchg of the current process.
  141. *
  142. * More over for this to go in infinite loop there need to be
  143. * continuously new fence signaled ie radeon_fence_read needs
  144. * to return a different value each time for both the currently
  145. * polling process and the other process that xchg the last_seq
  146. * btw atomic read and xchg of the current process. And the
  147. * value the other process set as last seq must be higher than
  148. * the seq value we just read. Which means that current process
  149. * need to be interrupted after radeon_fence_read and before
  150. * atomic xchg.
  151. *
  152. * To be even more safe we count the number of time we loop and
  153. * we bail after 10 loop just accepting the fact that we might
  154. * have temporarly set the last_seq not to the true real last
  155. * seq but to an older one.
  156. */
  157. last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  158. do {
  159. last_emitted = rdev->fence_drv[ring].sync_seq[ring];
  160. seq = radeon_fence_read(rdev, ring);
  161. seq |= last_seq & 0xffffffff00000000LL;
  162. if (seq < last_seq) {
  163. seq &= 0xffffffff;
  164. seq |= last_emitted & 0xffffffff00000000LL;
  165. }
  166. if (seq <= last_seq || seq > last_emitted) {
  167. break;
  168. }
  169. /* If we loop over we don't want to return without
  170. * checking if a fence is signaled as it means that the
  171. * seq we just read is different from the previous on.
  172. */
  173. wake = true;
  174. last_seq = seq;
  175. if ((count_loop++) > 10) {
  176. /* We looped over too many time leave with the
  177. * fact that we might have set an older fence
  178. * seq then the current real last seq as signaled
  179. * by the hw.
  180. */
  181. break;
  182. }
  183. } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
  184. if (wake) {
  185. rdev->fence_drv[ring].last_activity = jiffies;
  186. wake_up_all(&rdev->fence_queue);
  187. }
  188. }
  189. /**
  190. * radeon_fence_destroy - destroy a fence
  191. *
  192. * @kref: fence kref
  193. *
  194. * Frees the fence object (all asics).
  195. */
  196. static void radeon_fence_destroy(struct kref *kref)
  197. {
  198. struct radeon_fence *fence;
  199. fence = container_of(kref, struct radeon_fence, kref);
  200. kfree(fence);
  201. }
  202. /**
  203. * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
  204. *
  205. * @rdev: radeon device pointer
  206. * @seq: sequence number
  207. * @ring: ring index the fence is associated with
  208. *
  209. * Check if the last singled fence sequnce number is >= the requested
  210. * sequence number (all asics).
  211. * Returns true if the fence has signaled (current fence value
  212. * is >= requested value) or false if it has not (current fence
  213. * value is < the requested value. Helper function for
  214. * radeon_fence_signaled().
  215. */
  216. static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  217. u64 seq, unsigned ring)
  218. {
  219. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  220. return true;
  221. }
  222. /* poll new last sequence at least once */
  223. radeon_fence_process(rdev, ring);
  224. if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
  225. return true;
  226. }
  227. return false;
  228. }
  229. /**
  230. * radeon_fence_signaled - check if a fence has signaled
  231. *
  232. * @fence: radeon fence object
  233. *
  234. * Check if the requested fence has signaled (all asics).
  235. * Returns true if the fence has signaled or false if it has not.
  236. */
  237. bool radeon_fence_signaled(struct radeon_fence *fence)
  238. {
  239. if (!fence) {
  240. return true;
  241. }
  242. if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
  243. return true;
  244. }
  245. if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
  246. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  247. return true;
  248. }
  249. return false;
  250. }
  251. /**
  252. * radeon_fence_wait_seq - wait for a specific sequence number
  253. *
  254. * @rdev: radeon device pointer
  255. * @target_seq: sequence number we want to wait for
  256. * @ring: ring index the fence is associated with
  257. * @intr: use interruptable sleep
  258. * @lock_ring: whether the ring should be locked or not
  259. *
  260. * Wait for the requested sequence number to be written (all asics).
  261. * @intr selects whether to use interruptable (true) or non-interruptable
  262. * (false) sleep when waiting for the sequence number. Helper function
  263. * for radeon_fence_wait(), et al.
  264. * Returns 0 if the sequence number has passed, error for all other cases.
  265. * -EDEADLK is returned when a GPU lockup has been detected and the ring is
  266. * marked as not ready so no further jobs get scheduled until a successful
  267. * reset.
  268. */
  269. static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
  270. unsigned ring, bool intr, bool lock_ring)
  271. {
  272. unsigned long timeout, last_activity;
  273. uint64_t seq;
  274. unsigned i;
  275. bool signaled;
  276. int r;
  277. while (target_seq > atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  278. if (!rdev->ring[ring].ready) {
  279. return -EBUSY;
  280. }
  281. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  282. if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
  283. /* the normal case, timeout is somewhere before last_activity */
  284. timeout = rdev->fence_drv[ring].last_activity - timeout;
  285. } else {
  286. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  287. * anyway we will just wait for the minimum amount and then check for a lockup
  288. */
  289. timeout = 1;
  290. }
  291. seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
  292. /* Save current last activity valuee, used to check for GPU lockups */
  293. last_activity = rdev->fence_drv[ring].last_activity;
  294. trace_radeon_fence_wait_begin(rdev->ddev, seq);
  295. radeon_irq_kms_sw_irq_get(rdev, ring);
  296. if (intr) {
  297. r = wait_event_interruptible_timeout(rdev->fence_queue,
  298. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  299. timeout);
  300. } else {
  301. r = wait_event_timeout(rdev->fence_queue,
  302. (signaled = radeon_fence_seq_signaled(rdev, target_seq, ring)),
  303. timeout);
  304. }
  305. radeon_irq_kms_sw_irq_put(rdev, ring);
  306. if (unlikely(r < 0)) {
  307. return r;
  308. }
  309. trace_radeon_fence_wait_end(rdev->ddev, seq);
  310. if (unlikely(!signaled)) {
  311. /* we were interrupted for some reason and fence
  312. * isn't signaled yet, resume waiting */
  313. if (r) {
  314. continue;
  315. }
  316. /* check if sequence value has changed since last_activity */
  317. if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
  318. continue;
  319. }
  320. if (lock_ring) {
  321. mutex_lock(&rdev->ring_lock);
  322. }
  323. /* test if somebody else has already decided that this is a lockup */
  324. if (last_activity != rdev->fence_drv[ring].last_activity) {
  325. if (lock_ring) {
  326. mutex_unlock(&rdev->ring_lock);
  327. }
  328. continue;
  329. }
  330. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  331. /* good news we believe it's a lockup */
  332. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
  333. target_seq, seq);
  334. /* change last activity so nobody else think there is a lockup */
  335. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  336. rdev->fence_drv[i].last_activity = jiffies;
  337. }
  338. /* mark the ring as not ready any more */
  339. rdev->ring[ring].ready = false;
  340. if (lock_ring) {
  341. mutex_unlock(&rdev->ring_lock);
  342. }
  343. return -EDEADLK;
  344. }
  345. if (lock_ring) {
  346. mutex_unlock(&rdev->ring_lock);
  347. }
  348. }
  349. }
  350. return 0;
  351. }
  352. /**
  353. * radeon_fence_wait - wait for a fence to signal
  354. *
  355. * @fence: radeon fence object
  356. * @intr: use interruptable sleep
  357. *
  358. * Wait for the requested fence to signal (all asics).
  359. * @intr selects whether to use interruptable (true) or non-interruptable
  360. * (false) sleep when waiting for the fence.
  361. * Returns 0 if the fence has passed, error for all other cases.
  362. */
  363. int radeon_fence_wait(struct radeon_fence *fence, bool intr)
  364. {
  365. int r;
  366. if (fence == NULL) {
  367. WARN(1, "Querying an invalid fence : %p !\n", fence);
  368. return -EINVAL;
  369. }
  370. r = radeon_fence_wait_seq(fence->rdev, fence->seq,
  371. fence->ring, intr, true);
  372. if (r) {
  373. return r;
  374. }
  375. fence->seq = RADEON_FENCE_SIGNALED_SEQ;
  376. return 0;
  377. }
  378. static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
  379. {
  380. unsigned i;
  381. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  382. if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i)) {
  383. return true;
  384. }
  385. }
  386. return false;
  387. }
  388. /**
  389. * radeon_fence_wait_any_seq - wait for a sequence number on any ring
  390. *
  391. * @rdev: radeon device pointer
  392. * @target_seq: sequence number(s) we want to wait for
  393. * @intr: use interruptable sleep
  394. *
  395. * Wait for the requested sequence number(s) to be written by any ring
  396. * (all asics). Sequnce number array is indexed by ring id.
  397. * @intr selects whether to use interruptable (true) or non-interruptable
  398. * (false) sleep when waiting for the sequence number. Helper function
  399. * for radeon_fence_wait_any(), et al.
  400. * Returns 0 if the sequence number has passed, error for all other cases.
  401. */
  402. static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
  403. u64 *target_seq, bool intr)
  404. {
  405. unsigned long timeout, last_activity, tmp;
  406. unsigned i, ring = RADEON_NUM_RINGS;
  407. bool signaled;
  408. int r;
  409. for (i = 0, last_activity = 0; i < RADEON_NUM_RINGS; ++i) {
  410. if (!target_seq[i]) {
  411. continue;
  412. }
  413. /* use the most recent one as indicator */
  414. if (time_after(rdev->fence_drv[i].last_activity, last_activity)) {
  415. last_activity = rdev->fence_drv[i].last_activity;
  416. }
  417. /* For lockup detection just pick the lowest ring we are
  418. * actively waiting for
  419. */
  420. if (i < ring) {
  421. ring = i;
  422. }
  423. }
  424. /* nothing to wait for ? */
  425. if (ring == RADEON_NUM_RINGS) {
  426. return -ENOENT;
  427. }
  428. while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
  429. timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
  430. if (time_after(last_activity, timeout)) {
  431. /* the normal case, timeout is somewhere before last_activity */
  432. timeout = last_activity - timeout;
  433. } else {
  434. /* either jiffies wrapped around, or no fence was signaled in the last 500ms
  435. * anyway we will just wait for the minimum amount and then check for a lockup
  436. */
  437. timeout = 1;
  438. }
  439. trace_radeon_fence_wait_begin(rdev->ddev, target_seq[ring]);
  440. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  441. if (target_seq[i]) {
  442. radeon_irq_kms_sw_irq_get(rdev, i);
  443. }
  444. }
  445. if (intr) {
  446. r = wait_event_interruptible_timeout(rdev->fence_queue,
  447. (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
  448. timeout);
  449. } else {
  450. r = wait_event_timeout(rdev->fence_queue,
  451. (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)),
  452. timeout);
  453. }
  454. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  455. if (target_seq[i]) {
  456. radeon_irq_kms_sw_irq_put(rdev, i);
  457. }
  458. }
  459. if (unlikely(r < 0)) {
  460. return r;
  461. }
  462. trace_radeon_fence_wait_end(rdev->ddev, target_seq[ring]);
  463. if (unlikely(!signaled)) {
  464. /* we were interrupted for some reason and fence
  465. * isn't signaled yet, resume waiting */
  466. if (r) {
  467. continue;
  468. }
  469. mutex_lock(&rdev->ring_lock);
  470. for (i = 0, tmp = 0; i < RADEON_NUM_RINGS; ++i) {
  471. if (time_after(rdev->fence_drv[i].last_activity, tmp)) {
  472. tmp = rdev->fence_drv[i].last_activity;
  473. }
  474. }
  475. /* test if somebody else has already decided that this is a lockup */
  476. if (last_activity != tmp) {
  477. last_activity = tmp;
  478. mutex_unlock(&rdev->ring_lock);
  479. continue;
  480. }
  481. if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
  482. /* good news we believe it's a lockup */
  483. dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx)\n",
  484. target_seq[ring]);
  485. /* change last activity so nobody else think there is a lockup */
  486. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  487. rdev->fence_drv[i].last_activity = jiffies;
  488. }
  489. /* mark the ring as not ready any more */
  490. rdev->ring[ring].ready = false;
  491. mutex_unlock(&rdev->ring_lock);
  492. return -EDEADLK;
  493. }
  494. mutex_unlock(&rdev->ring_lock);
  495. }
  496. }
  497. return 0;
  498. }
  499. /**
  500. * radeon_fence_wait_any - wait for a fence to signal on any ring
  501. *
  502. * @rdev: radeon device pointer
  503. * @fences: radeon fence object(s)
  504. * @intr: use interruptable sleep
  505. *
  506. * Wait for any requested fence to signal (all asics). Fence
  507. * array is indexed by ring id. @intr selects whether to use
  508. * interruptable (true) or non-interruptable (false) sleep when
  509. * waiting for the fences. Used by the suballocator.
  510. * Returns 0 if any fence has passed, error for all other cases.
  511. */
  512. int radeon_fence_wait_any(struct radeon_device *rdev,
  513. struct radeon_fence **fences,
  514. bool intr)
  515. {
  516. uint64_t seq[RADEON_NUM_RINGS];
  517. unsigned i;
  518. int r;
  519. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  520. seq[i] = 0;
  521. if (!fences[i]) {
  522. continue;
  523. }
  524. if (fences[i]->seq == RADEON_FENCE_SIGNALED_SEQ) {
  525. /* something was allready signaled */
  526. return 0;
  527. }
  528. seq[i] = fences[i]->seq;
  529. }
  530. r = radeon_fence_wait_any_seq(rdev, seq, intr);
  531. if (r) {
  532. return r;
  533. }
  534. return 0;
  535. }
  536. /**
  537. * radeon_fence_wait_next_locked - wait for the next fence to signal
  538. *
  539. * @rdev: radeon device pointer
  540. * @ring: ring index the fence is associated with
  541. *
  542. * Wait for the next fence on the requested ring to signal (all asics).
  543. * Returns 0 if the next fence has passed, error for all other cases.
  544. * Caller must hold ring lock.
  545. */
  546. int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
  547. {
  548. uint64_t seq;
  549. seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
  550. if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
  551. /* nothing to wait for, last_seq is
  552. already the last emited fence */
  553. return -ENOENT;
  554. }
  555. return radeon_fence_wait_seq(rdev, seq, ring, false, false);
  556. }
  557. /**
  558. * radeon_fence_wait_empty_locked - wait for all fences to signal
  559. *
  560. * @rdev: radeon device pointer
  561. * @ring: ring index the fence is associated with
  562. *
  563. * Wait for all fences on the requested ring to signal (all asics).
  564. * Returns 0 if the fences have passed, error for all other cases.
  565. * Caller must hold ring lock.
  566. */
  567. int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
  568. {
  569. uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
  570. int r;
  571. r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
  572. if (r) {
  573. if (r == -EDEADLK) {
  574. return -EDEADLK;
  575. }
  576. dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
  577. ring, r);
  578. }
  579. return 0;
  580. }
  581. /**
  582. * radeon_fence_ref - take a ref on a fence
  583. *
  584. * @fence: radeon fence object
  585. *
  586. * Take a reference on a fence (all asics).
  587. * Returns the fence.
  588. */
  589. struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
  590. {
  591. kref_get(&fence->kref);
  592. return fence;
  593. }
  594. /**
  595. * radeon_fence_unref - remove a ref on a fence
  596. *
  597. * @fence: radeon fence object
  598. *
  599. * Remove a reference on a fence (all asics).
  600. */
  601. void radeon_fence_unref(struct radeon_fence **fence)
  602. {
  603. struct radeon_fence *tmp = *fence;
  604. *fence = NULL;
  605. if (tmp) {
  606. kref_put(&tmp->kref, radeon_fence_destroy);
  607. }
  608. }
  609. /**
  610. * radeon_fence_count_emitted - get the count of emitted fences
  611. *
  612. * @rdev: radeon device pointer
  613. * @ring: ring index the fence is associated with
  614. *
  615. * Get the number of fences emitted on the requested ring (all asics).
  616. * Returns the number of emitted fences on the ring. Used by the
  617. * dynpm code to ring track activity.
  618. */
  619. unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
  620. {
  621. uint64_t emitted;
  622. /* We are not protected by ring lock when reading the last sequence
  623. * but it's ok to report slightly wrong fence count here.
  624. */
  625. radeon_fence_process(rdev, ring);
  626. emitted = rdev->fence_drv[ring].sync_seq[ring]
  627. - atomic64_read(&rdev->fence_drv[ring].last_seq);
  628. /* to avoid 32bits warp around */
  629. if (emitted > 0x10000000) {
  630. emitted = 0x10000000;
  631. }
  632. return (unsigned)emitted;
  633. }
  634. /**
  635. * radeon_fence_need_sync - do we need a semaphore
  636. *
  637. * @fence: radeon fence object
  638. * @dst_ring: which ring to check against
  639. *
  640. * Check if the fence needs to be synced against another ring
  641. * (all asics). If so, we need to emit a semaphore.
  642. * Returns true if we need to sync with another ring, false if
  643. * not.
  644. */
  645. bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
  646. {
  647. struct radeon_fence_driver *fdrv;
  648. if (!fence) {
  649. return false;
  650. }
  651. if (fence->ring == dst_ring) {
  652. return false;
  653. }
  654. /* we are protected by the ring mutex */
  655. fdrv = &fence->rdev->fence_drv[dst_ring];
  656. if (fence->seq <= fdrv->sync_seq[fence->ring]) {
  657. return false;
  658. }
  659. return true;
  660. }
  661. /**
  662. * radeon_fence_note_sync - record the sync point
  663. *
  664. * @fence: radeon fence object
  665. * @dst_ring: which ring to check against
  666. *
  667. * Note the sequence number at which point the fence will
  668. * be synced with the requested ring (all asics).
  669. */
  670. void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
  671. {
  672. struct radeon_fence_driver *dst, *src;
  673. unsigned i;
  674. if (!fence) {
  675. return;
  676. }
  677. if (fence->ring == dst_ring) {
  678. return;
  679. }
  680. /* we are protected by the ring mutex */
  681. src = &fence->rdev->fence_drv[fence->ring];
  682. dst = &fence->rdev->fence_drv[dst_ring];
  683. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  684. if (i == dst_ring) {
  685. continue;
  686. }
  687. dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
  688. }
  689. }
  690. /**
  691. * radeon_fence_driver_start_ring - make the fence driver
  692. * ready for use on the requested ring.
  693. *
  694. * @rdev: radeon device pointer
  695. * @ring: ring index to start the fence driver on
  696. *
  697. * Make the fence driver ready for processing (all asics).
  698. * Not all asics have all rings, so each asic will only
  699. * start the fence driver on the rings it has.
  700. * Returns 0 for success, errors for failure.
  701. */
  702. int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
  703. {
  704. uint64_t index;
  705. int r;
  706. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  707. if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
  708. rdev->fence_drv[ring].scratch_reg = 0;
  709. if (ring != R600_RING_TYPE_UVD_INDEX) {
  710. index = R600_WB_EVENT_OFFSET + ring * 4;
  711. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  712. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
  713. index;
  714. } else {
  715. /* put fence directly behind firmware */
  716. index = ALIGN(rdev->uvd.fw_size, 8);
  717. rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
  718. rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
  719. }
  720. } else {
  721. r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
  722. if (r) {
  723. dev_err(rdev->dev, "fence failed to get scratch register\n");
  724. return r;
  725. }
  726. index = RADEON_WB_SCRATCH_OFFSET +
  727. rdev->fence_drv[ring].scratch_reg -
  728. rdev->scratch.reg_base;
  729. rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
  730. rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
  731. }
  732. radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
  733. rdev->fence_drv[ring].initialized = true;
  734. dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
  735. ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
  736. return 0;
  737. }
  738. /**
  739. * radeon_fence_driver_init_ring - init the fence driver
  740. * for the requested ring.
  741. *
  742. * @rdev: radeon device pointer
  743. * @ring: ring index to start the fence driver on
  744. *
  745. * Init the fence driver for the requested ring (all asics).
  746. * Helper function for radeon_fence_driver_init().
  747. */
  748. static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
  749. {
  750. int i;
  751. rdev->fence_drv[ring].scratch_reg = -1;
  752. rdev->fence_drv[ring].cpu_addr = NULL;
  753. rdev->fence_drv[ring].gpu_addr = 0;
  754. for (i = 0; i < RADEON_NUM_RINGS; ++i)
  755. rdev->fence_drv[ring].sync_seq[i] = 0;
  756. atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
  757. rdev->fence_drv[ring].last_activity = jiffies;
  758. rdev->fence_drv[ring].initialized = false;
  759. }
  760. /**
  761. * radeon_fence_driver_init - init the fence driver
  762. * for all possible rings.
  763. *
  764. * @rdev: radeon device pointer
  765. *
  766. * Init the fence driver for all possible rings (all asics).
  767. * Not all asics have all rings, so each asic will only
  768. * start the fence driver on the rings it has using
  769. * radeon_fence_driver_start_ring().
  770. * Returns 0 for success.
  771. */
  772. int radeon_fence_driver_init(struct radeon_device *rdev)
  773. {
  774. int ring;
  775. init_waitqueue_head(&rdev->fence_queue);
  776. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  777. radeon_fence_driver_init_ring(rdev, ring);
  778. }
  779. if (radeon_debugfs_fence_init(rdev)) {
  780. dev_err(rdev->dev, "fence debugfs file creation failed\n");
  781. }
  782. return 0;
  783. }
  784. /**
  785. * radeon_fence_driver_fini - tear down the fence driver
  786. * for all possible rings.
  787. *
  788. * @rdev: radeon device pointer
  789. *
  790. * Tear down the fence driver for all possible rings (all asics).
  791. */
  792. void radeon_fence_driver_fini(struct radeon_device *rdev)
  793. {
  794. int ring, r;
  795. mutex_lock(&rdev->ring_lock);
  796. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  797. if (!rdev->fence_drv[ring].initialized)
  798. continue;
  799. r = radeon_fence_wait_empty_locked(rdev, ring);
  800. if (r) {
  801. /* no need to trigger GPU reset as we are unloading */
  802. radeon_fence_driver_force_completion(rdev);
  803. }
  804. wake_up_all(&rdev->fence_queue);
  805. radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
  806. rdev->fence_drv[ring].initialized = false;
  807. }
  808. mutex_unlock(&rdev->ring_lock);
  809. }
  810. /**
  811. * radeon_fence_driver_force_completion - force all fence waiter to complete
  812. *
  813. * @rdev: radeon device pointer
  814. *
  815. * In case of GPU reset failure make sure no process keep waiting on fence
  816. * that will never complete.
  817. */
  818. void radeon_fence_driver_force_completion(struct radeon_device *rdev)
  819. {
  820. int ring;
  821. for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
  822. if (!rdev->fence_drv[ring].initialized)
  823. continue;
  824. radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
  825. }
  826. }
  827. /*
  828. * Fence debugfs
  829. */
  830. #if defined(CONFIG_DEBUG_FS)
  831. static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
  832. {
  833. struct drm_info_node *node = (struct drm_info_node *)m->private;
  834. struct drm_device *dev = node->minor->dev;
  835. struct radeon_device *rdev = dev->dev_private;
  836. int i, j;
  837. for (i = 0; i < RADEON_NUM_RINGS; ++i) {
  838. if (!rdev->fence_drv[i].initialized)
  839. continue;
  840. seq_printf(m, "--- ring %d ---\n", i);
  841. seq_printf(m, "Last signaled fence 0x%016llx\n",
  842. (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
  843. seq_printf(m, "Last emitted 0x%016llx\n",
  844. rdev->fence_drv[i].sync_seq[i]);
  845. for (j = 0; j < RADEON_NUM_RINGS; ++j) {
  846. if (i != j && rdev->fence_drv[j].initialized)
  847. seq_printf(m, "Last sync to ring %d 0x%016llx\n",
  848. j, rdev->fence_drv[i].sync_seq[j]);
  849. }
  850. }
  851. return 0;
  852. }
  853. static struct drm_info_list radeon_debugfs_fence_list[] = {
  854. {"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
  855. };
  856. #endif
  857. int radeon_debugfs_fence_init(struct radeon_device *rdev)
  858. {
  859. #if defined(CONFIG_DEBUG_FS)
  860. return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 1);
  861. #else
  862. return 0;
  863. #endif
  864. }