cik_sdma.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. /*
  2. * Copyright 2013 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: Alex Deucher
  23. */
  24. #include <linux/firmware.h>
  25. #include <drm/drmP.h>
  26. #include "radeon.h"
  27. #include "radeon_asic.h"
  28. #include "radeon_trace.h"
  29. #include "cikd.h"
  30. /* sdma */
  31. #define CIK_SDMA_UCODE_SIZE 1050
  32. #define CIK_SDMA_UCODE_VERSION 64
  33. u32 cik_gpu_check_soft_reset(struct radeon_device *rdev);
  34. /*
  35. * sDMA - System DMA
  36. * Starting with CIK, the GPU has new asynchronous
  37. * DMA engines. These engines are used for compute
  38. * and gfx. There are two DMA engines (SDMA0, SDMA1)
  39. * and each one supports 1 ring buffer used for gfx
  40. * and 2 queues used for compute.
  41. *
  42. * The programming model is very similar to the CP
  43. * (ring buffer, IBs, etc.), but sDMA has it's own
  44. * packet format that is different from the PM4 format
  45. * used by the CP. sDMA supports copying data, writing
  46. * embedded data, solid fills, and a number of other
  47. * things. It also has support for tiling/detiling of
  48. * buffers.
  49. */
  50. /**
  51. * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
  52. *
  53. * @rdev: radeon_device pointer
  54. * @ib: IB object to schedule
  55. *
  56. * Schedule an IB in the DMA ring (CIK).
  57. */
  58. void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
  59. struct radeon_ib *ib)
  60. {
  61. struct radeon_ring *ring = &rdev->ring[ib->ring];
  62. u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
  63. if (rdev->wb.enabled) {
  64. u32 next_rptr = ring->wptr + 5;
  65. while ((next_rptr & 7) != 4)
  66. next_rptr++;
  67. next_rptr += 4;
  68. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
  69. radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
  70. radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
  71. radeon_ring_write(ring, 1); /* number of DWs to follow */
  72. radeon_ring_write(ring, next_rptr);
  73. }
  74. /* IB packet must end on a 8 DW boundary */
  75. while ((ring->wptr & 7) != 4)
  76. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
  77. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
  78. radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
  79. radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
  80. radeon_ring_write(ring, ib->length_dw);
  81. }
  82. /**
  83. * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
  84. *
  85. * @rdev: radeon_device pointer
  86. * @fence: radeon fence object
  87. *
  88. * Add a DMA fence packet to the ring to write
  89. * the fence seq number and DMA trap packet to generate
  90. * an interrupt if needed (CIK).
  91. */
  92. void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
  93. struct radeon_fence *fence)
  94. {
  95. struct radeon_ring *ring = &rdev->ring[fence->ring];
  96. u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
  97. /* write the fence */
  98. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
  99. radeon_ring_write(ring, addr & 0xffffffff);
  100. radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
  101. radeon_ring_write(ring, fence->seq);
  102. /* generate an interrupt */
  103. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
  104. /* flush HDP */
  105. /* We should be using the new POLL_REG_MEM special op packet here
  106. * but it causes sDMA to hang sometimes
  107. */
  108. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  109. radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
  110. radeon_ring_write(ring, 0);
  111. }
  112. /**
  113. * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
  114. *
  115. * @rdev: radeon_device pointer
  116. * @ring: radeon_ring structure holding ring information
  117. * @semaphore: radeon semaphore object
  118. * @emit_wait: wait or signal semaphore
  119. *
  120. * Add a DMA semaphore packet to the ring wait on or signal
  121. * other rings (CIK).
  122. */
  123. bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
  124. struct radeon_ring *ring,
  125. struct radeon_semaphore *semaphore,
  126. bool emit_wait)
  127. {
  128. u64 addr = semaphore->gpu_addr;
  129. u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
  130. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
  131. radeon_ring_write(ring, addr & 0xfffffff8);
  132. radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
  133. return true;
  134. }
  135. /**
  136. * cik_sdma_gfx_stop - stop the gfx async dma engines
  137. *
  138. * @rdev: radeon_device pointer
  139. *
  140. * Stop the gfx async dma ring buffers (CIK).
  141. */
  142. static void cik_sdma_gfx_stop(struct radeon_device *rdev)
  143. {
  144. u32 rb_cntl, reg_offset;
  145. int i;
  146. radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
  147. for (i = 0; i < 2; i++) {
  148. if (i == 0)
  149. reg_offset = SDMA0_REGISTER_OFFSET;
  150. else
  151. reg_offset = SDMA1_REGISTER_OFFSET;
  152. rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
  153. rb_cntl &= ~SDMA_RB_ENABLE;
  154. WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
  155. WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
  156. }
  157. }
  158. /**
  159. * cik_sdma_rlc_stop - stop the compute async dma engines
  160. *
  161. * @rdev: radeon_device pointer
  162. *
  163. * Stop the compute async dma queues (CIK).
  164. */
  165. static void cik_sdma_rlc_stop(struct radeon_device *rdev)
  166. {
  167. /* XXX todo */
  168. }
  169. /**
  170. * cik_sdma_enable - stop the async dma engines
  171. *
  172. * @rdev: radeon_device pointer
  173. * @enable: enable/disable the DMA MEs.
  174. *
  175. * Halt or unhalt the async dma engines (CIK).
  176. */
  177. void cik_sdma_enable(struct radeon_device *rdev, bool enable)
  178. {
  179. u32 me_cntl, reg_offset;
  180. int i;
  181. for (i = 0; i < 2; i++) {
  182. if (i == 0)
  183. reg_offset = SDMA0_REGISTER_OFFSET;
  184. else
  185. reg_offset = SDMA1_REGISTER_OFFSET;
  186. me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
  187. if (enable)
  188. me_cntl &= ~SDMA_HALT;
  189. else
  190. me_cntl |= SDMA_HALT;
  191. WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
  192. }
  193. }
  194. /**
  195. * cik_sdma_gfx_resume - setup and start the async dma engines
  196. *
  197. * @rdev: radeon_device pointer
  198. *
  199. * Set up the gfx DMA ring buffers and enable them (CIK).
  200. * Returns 0 for success, error for failure.
  201. */
  202. static int cik_sdma_gfx_resume(struct radeon_device *rdev)
  203. {
  204. struct radeon_ring *ring;
  205. u32 rb_cntl, ib_cntl;
  206. u32 rb_bufsz;
  207. u32 reg_offset, wb_offset;
  208. int i, r;
  209. for (i = 0; i < 2; i++) {
  210. if (i == 0) {
  211. ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
  212. reg_offset = SDMA0_REGISTER_OFFSET;
  213. wb_offset = R600_WB_DMA_RPTR_OFFSET;
  214. } else {
  215. ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
  216. reg_offset = SDMA1_REGISTER_OFFSET;
  217. wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
  218. }
  219. WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
  220. WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
  221. /* Set ring buffer size in dwords */
  222. rb_bufsz = order_base_2(ring->ring_size / 4);
  223. rb_cntl = rb_bufsz << 1;
  224. #ifdef __BIG_ENDIAN
  225. rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
  226. #endif
  227. WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
  228. /* Initialize the ring buffer's read and write pointers */
  229. WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
  230. WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
  231. /* set the wb address whether it's enabled or not */
  232. WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
  233. upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
  234. WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
  235. ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
  236. if (rdev->wb.enabled)
  237. rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
  238. WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
  239. WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
  240. ring->wptr = 0;
  241. WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
  242. ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
  243. /* enable DMA RB */
  244. WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
  245. ib_cntl = SDMA_IB_ENABLE;
  246. #ifdef __BIG_ENDIAN
  247. ib_cntl |= SDMA_IB_SWAP_ENABLE;
  248. #endif
  249. /* enable DMA IBs */
  250. WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
  251. ring->ready = true;
  252. r = radeon_ring_test(rdev, ring->idx, ring);
  253. if (r) {
  254. ring->ready = false;
  255. return r;
  256. }
  257. }
  258. radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
  259. return 0;
  260. }
  261. /**
  262. * cik_sdma_rlc_resume - setup and start the async dma engines
  263. *
  264. * @rdev: radeon_device pointer
  265. *
  266. * Set up the compute DMA queues and enable them (CIK).
  267. * Returns 0 for success, error for failure.
  268. */
  269. static int cik_sdma_rlc_resume(struct radeon_device *rdev)
  270. {
  271. /* XXX todo */
  272. return 0;
  273. }
  274. /**
  275. * cik_sdma_load_microcode - load the sDMA ME ucode
  276. *
  277. * @rdev: radeon_device pointer
  278. *
  279. * Loads the sDMA0/1 ucode.
  280. * Returns 0 for success, -EINVAL if the ucode is not available.
  281. */
  282. static int cik_sdma_load_microcode(struct radeon_device *rdev)
  283. {
  284. const __be32 *fw_data;
  285. int i;
  286. if (!rdev->sdma_fw)
  287. return -EINVAL;
  288. /* stop the gfx rings and rlc compute queues */
  289. cik_sdma_gfx_stop(rdev);
  290. cik_sdma_rlc_stop(rdev);
  291. /* halt the MEs */
  292. cik_sdma_enable(rdev, false);
  293. /* sdma0 */
  294. fw_data = (const __be32 *)rdev->sdma_fw->data;
  295. WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
  296. for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
  297. WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
  298. WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
  299. /* sdma1 */
  300. fw_data = (const __be32 *)rdev->sdma_fw->data;
  301. WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
  302. for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
  303. WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
  304. WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
  305. WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
  306. WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
  307. return 0;
  308. }
  309. /**
  310. * cik_sdma_resume - setup and start the async dma engines
  311. *
  312. * @rdev: radeon_device pointer
  313. *
  314. * Set up the DMA engines and enable them (CIK).
  315. * Returns 0 for success, error for failure.
  316. */
  317. int cik_sdma_resume(struct radeon_device *rdev)
  318. {
  319. int r;
  320. /* Reset dma */
  321. WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
  322. RREG32(SRBM_SOFT_RESET);
  323. udelay(50);
  324. WREG32(SRBM_SOFT_RESET, 0);
  325. RREG32(SRBM_SOFT_RESET);
  326. r = cik_sdma_load_microcode(rdev);
  327. if (r)
  328. return r;
  329. /* unhalt the MEs */
  330. cik_sdma_enable(rdev, true);
  331. /* start the gfx rings and rlc compute queues */
  332. r = cik_sdma_gfx_resume(rdev);
  333. if (r)
  334. return r;
  335. r = cik_sdma_rlc_resume(rdev);
  336. if (r)
  337. return r;
  338. return 0;
  339. }
  340. /**
  341. * cik_sdma_fini - tear down the async dma engines
  342. *
  343. * @rdev: radeon_device pointer
  344. *
  345. * Stop the async dma engines and free the rings (CIK).
  346. */
  347. void cik_sdma_fini(struct radeon_device *rdev)
  348. {
  349. /* stop the gfx rings and rlc compute queues */
  350. cik_sdma_gfx_stop(rdev);
  351. cik_sdma_rlc_stop(rdev);
  352. /* halt the MEs */
  353. cik_sdma_enable(rdev, false);
  354. radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
  355. radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
  356. /* XXX - compute dma queue tear down */
  357. }
  358. /**
  359. * cik_copy_dma - copy pages using the DMA engine
  360. *
  361. * @rdev: radeon_device pointer
  362. * @src_offset: src GPU address
  363. * @dst_offset: dst GPU address
  364. * @num_gpu_pages: number of GPU pages to xfer
  365. * @fence: radeon fence object
  366. *
  367. * Copy GPU paging using the DMA engine (CIK).
  368. * Used by the radeon ttm implementation to move pages if
  369. * registered as the asic copy callback.
  370. */
  371. int cik_copy_dma(struct radeon_device *rdev,
  372. uint64_t src_offset, uint64_t dst_offset,
  373. unsigned num_gpu_pages,
  374. struct radeon_fence **fence)
  375. {
  376. struct radeon_semaphore *sem = NULL;
  377. int ring_index = rdev->asic->copy.dma_ring_index;
  378. struct radeon_ring *ring = &rdev->ring[ring_index];
  379. u32 size_in_bytes, cur_size_in_bytes;
  380. int i, num_loops;
  381. int r = 0;
  382. r = radeon_semaphore_create(rdev, &sem);
  383. if (r) {
  384. DRM_ERROR("radeon: moving bo (%d).\n", r);
  385. return r;
  386. }
  387. size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
  388. num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
  389. r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
  390. if (r) {
  391. DRM_ERROR("radeon: moving bo (%d).\n", r);
  392. radeon_semaphore_free(rdev, &sem, NULL);
  393. return r;
  394. }
  395. radeon_semaphore_sync_to(sem, *fence);
  396. radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  397. for (i = 0; i < num_loops; i++) {
  398. cur_size_in_bytes = size_in_bytes;
  399. if (cur_size_in_bytes > 0x1fffff)
  400. cur_size_in_bytes = 0x1fffff;
  401. size_in_bytes -= cur_size_in_bytes;
  402. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
  403. radeon_ring_write(ring, cur_size_in_bytes);
  404. radeon_ring_write(ring, 0); /* src/dst endian swap */
  405. radeon_ring_write(ring, src_offset & 0xffffffff);
  406. radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
  407. radeon_ring_write(ring, dst_offset & 0xfffffffc);
  408. radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
  409. src_offset += cur_size_in_bytes;
  410. dst_offset += cur_size_in_bytes;
  411. }
  412. r = radeon_fence_emit(rdev, fence, ring->idx);
  413. if (r) {
  414. radeon_ring_unlock_undo(rdev, ring);
  415. return r;
  416. }
  417. radeon_ring_unlock_commit(rdev, ring);
  418. radeon_semaphore_free(rdev, &sem, *fence);
  419. return r;
  420. }
  421. /**
  422. * cik_sdma_ring_test - simple async dma engine test
  423. *
  424. * @rdev: radeon_device pointer
  425. * @ring: radeon_ring structure holding ring information
  426. *
  427. * Test the DMA engine by writing using it to write an
  428. * value to memory. (CIK).
  429. * Returns 0 for success, error for failure.
  430. */
  431. int cik_sdma_ring_test(struct radeon_device *rdev,
  432. struct radeon_ring *ring)
  433. {
  434. unsigned i;
  435. int r;
  436. void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
  437. u32 tmp;
  438. if (!ptr) {
  439. DRM_ERROR("invalid vram scratch pointer\n");
  440. return -EINVAL;
  441. }
  442. tmp = 0xCAFEDEAD;
  443. writel(tmp, ptr);
  444. r = radeon_ring_lock(rdev, ring, 4);
  445. if (r) {
  446. DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
  447. return r;
  448. }
  449. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
  450. radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
  451. radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
  452. radeon_ring_write(ring, 1); /* number of DWs to follow */
  453. radeon_ring_write(ring, 0xDEADBEEF);
  454. radeon_ring_unlock_commit(rdev, ring);
  455. for (i = 0; i < rdev->usec_timeout; i++) {
  456. tmp = readl(ptr);
  457. if (tmp == 0xDEADBEEF)
  458. break;
  459. DRM_UDELAY(1);
  460. }
  461. if (i < rdev->usec_timeout) {
  462. DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
  463. } else {
  464. DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
  465. ring->idx, tmp);
  466. r = -EINVAL;
  467. }
  468. return r;
  469. }
  470. /**
  471. * cik_sdma_ib_test - test an IB on the DMA engine
  472. *
  473. * @rdev: radeon_device pointer
  474. * @ring: radeon_ring structure holding ring information
  475. *
  476. * Test a simple IB in the DMA ring (CIK).
  477. * Returns 0 on success, error on failure.
  478. */
  479. int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
  480. {
  481. struct radeon_ib ib;
  482. unsigned i;
  483. int r;
  484. void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
  485. u32 tmp = 0;
  486. if (!ptr) {
  487. DRM_ERROR("invalid vram scratch pointer\n");
  488. return -EINVAL;
  489. }
  490. tmp = 0xCAFEDEAD;
  491. writel(tmp, ptr);
  492. r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
  493. if (r) {
  494. DRM_ERROR("radeon: failed to get ib (%d).\n", r);
  495. return r;
  496. }
  497. ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
  498. ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
  499. ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
  500. ib.ptr[3] = 1;
  501. ib.ptr[4] = 0xDEADBEEF;
  502. ib.length_dw = 5;
  503. r = radeon_ib_schedule(rdev, &ib, NULL);
  504. if (r) {
  505. radeon_ib_free(rdev, &ib);
  506. DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
  507. return r;
  508. }
  509. r = radeon_fence_wait(ib.fence, false);
  510. if (r) {
  511. DRM_ERROR("radeon: fence wait failed (%d).\n", r);
  512. return r;
  513. }
  514. for (i = 0; i < rdev->usec_timeout; i++) {
  515. tmp = readl(ptr);
  516. if (tmp == 0xDEADBEEF)
  517. break;
  518. DRM_UDELAY(1);
  519. }
  520. if (i < rdev->usec_timeout) {
  521. DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
  522. } else {
  523. DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
  524. r = -EINVAL;
  525. }
  526. radeon_ib_free(rdev, &ib);
  527. return r;
  528. }
  529. /**
  530. * cik_sdma_is_lockup - Check if the DMA engine is locked up
  531. *
  532. * @rdev: radeon_device pointer
  533. * @ring: radeon_ring structure holding ring information
  534. *
  535. * Check if the async DMA engine is locked up (CIK).
  536. * Returns true if the engine appears to be locked up, false if not.
  537. */
  538. bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
  539. {
  540. u32 reset_mask = cik_gpu_check_soft_reset(rdev);
  541. u32 mask;
  542. if (ring->idx == R600_RING_TYPE_DMA_INDEX)
  543. mask = RADEON_RESET_DMA;
  544. else
  545. mask = RADEON_RESET_DMA1;
  546. if (!(reset_mask & mask)) {
  547. radeon_ring_lockup_update(ring);
  548. return false;
  549. }
  550. /* force ring activities */
  551. radeon_ring_force_activity(rdev, ring);
  552. return radeon_ring_test_lockup(rdev, ring);
  553. }
  554. /**
  555. * cik_sdma_vm_set_page - update the page tables using sDMA
  556. *
  557. * @rdev: radeon_device pointer
  558. * @ib: indirect buffer to fill with commands
  559. * @pe: addr of the page entry
  560. * @addr: dst addr to write into pe
  561. * @count: number of page entries to update
  562. * @incr: increase next addr by incr bytes
  563. * @flags: access flags
  564. *
  565. * Update the page tables using sDMA (CIK).
  566. */
  567. void cik_sdma_vm_set_page(struct radeon_device *rdev,
  568. struct radeon_ib *ib,
  569. uint64_t pe,
  570. uint64_t addr, unsigned count,
  571. uint32_t incr, uint32_t flags)
  572. {
  573. uint64_t value;
  574. unsigned ndw;
  575. trace_radeon_vm_set_page(pe, addr, count, incr, flags);
  576. if (flags & R600_PTE_SYSTEM) {
  577. while (count) {
  578. ndw = count * 2;
  579. if (ndw > 0xFFFFE)
  580. ndw = 0xFFFFE;
  581. /* for non-physically contiguous pages (system) */
  582. ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
  583. ib->ptr[ib->length_dw++] = pe;
  584. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  585. ib->ptr[ib->length_dw++] = ndw;
  586. for (; ndw > 0; ndw -= 2, --count, pe += 8) {
  587. value = radeon_vm_map_gart(rdev, addr);
  588. value &= 0xFFFFFFFFFFFFF000ULL;
  589. addr += incr;
  590. value |= flags;
  591. ib->ptr[ib->length_dw++] = value;
  592. ib->ptr[ib->length_dw++] = upper_32_bits(value);
  593. }
  594. }
  595. } else {
  596. while (count) {
  597. ndw = count;
  598. if (ndw > 0x7FFFF)
  599. ndw = 0x7FFFF;
  600. if (flags & R600_PTE_VALID)
  601. value = addr;
  602. else
  603. value = 0;
  604. /* for physically contiguous pages (vram) */
  605. ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
  606. ib->ptr[ib->length_dw++] = pe; /* dst addr */
  607. ib->ptr[ib->length_dw++] = upper_32_bits(pe);
  608. ib->ptr[ib->length_dw++] = flags; /* mask */
  609. ib->ptr[ib->length_dw++] = 0;
  610. ib->ptr[ib->length_dw++] = value; /* value */
  611. ib->ptr[ib->length_dw++] = upper_32_bits(value);
  612. ib->ptr[ib->length_dw++] = incr; /* increment size */
  613. ib->ptr[ib->length_dw++] = 0;
  614. ib->ptr[ib->length_dw++] = ndw; /* number of entries */
  615. pe += ndw * 8;
  616. addr += ndw * incr;
  617. count -= ndw;
  618. }
  619. }
  620. while (ib->length_dw & 0x7)
  621. ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
  622. }
  623. /**
  624. * cik_dma_vm_flush - cik vm flush using sDMA
  625. *
  626. * @rdev: radeon_device pointer
  627. *
  628. * Update the page table base and flush the VM TLB
  629. * using sDMA (CIK).
  630. */
  631. void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
  632. {
  633. struct radeon_ring *ring = &rdev->ring[ridx];
  634. if (vm == NULL)
  635. return;
  636. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  637. if (vm->id < 8) {
  638. radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
  639. } else {
  640. radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
  641. }
  642. radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
  643. /* update SH_MEM_* regs */
  644. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  645. radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
  646. radeon_ring_write(ring, VMID(vm->id));
  647. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  648. radeon_ring_write(ring, SH_MEM_BASES >> 2);
  649. radeon_ring_write(ring, 0);
  650. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  651. radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
  652. radeon_ring_write(ring, 0);
  653. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  654. radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
  655. radeon_ring_write(ring, 1);
  656. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  657. radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
  658. radeon_ring_write(ring, 0);
  659. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  660. radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
  661. radeon_ring_write(ring, VMID(0));
  662. /* flush HDP */
  663. /* We should be using the new POLL_REG_MEM special op packet here
  664. * but it causes sDMA to hang sometimes
  665. */
  666. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  667. radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
  668. radeon_ring_write(ring, 0);
  669. /* flush TLB */
  670. radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
  671. radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
  672. radeon_ring_write(ring, 1 << vm->id);
  673. }