r100.c 42 KB


  1. /*
  2. * Copyright 2008 Advanced Micro Devices, Inc.
  3. * Copyright 2008 Red Hat Inc.
  4. * Copyright 2009 Jerome Glisse.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22. * OTHER DEALINGS IN THE SOFTWARE.
  23. *
  24. * Authors: Dave Airlie
  25. * Alex Deucher
  26. * Jerome Glisse
  27. */
  28. #include <linux/seq_file.h>
  29. #include "drmP.h"
  30. #include "drm.h"
  31. #include "radeon_drm.h"
  32. #include "radeon_microcode.h"
  33. #include "radeon_reg.h"
  34. #include "radeon.h"
  35. /* This files gather functions specifics to:
  36. * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  37. *
  38. * Some of these functions might be used by newer ASICs.
  39. */
  40. void r100_hdp_reset(struct radeon_device *rdev);
  41. void r100_gpu_init(struct radeon_device *rdev);
  42. int r100_gui_wait_for_idle(struct radeon_device *rdev);
  43. int r100_mc_wait_for_idle(struct radeon_device *rdev);
  44. void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
  45. void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
  46. int r100_debugfs_mc_info_init(struct radeon_device *rdev);
  47. /*
  48. * PCI GART
  49. */
  50. void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
  51. {
  52. /* TODO: can we do somethings here ? */
  53. /* It seems hw only cache one entry so we should discard this
  54. * entry otherwise if first GPU GART read hit this entry it
  55. * could end up in wrong address. */
  56. }
  57. int r100_pci_gart_enable(struct radeon_device *rdev)
  58. {
  59. uint32_t tmp;
  60. int r;
  61. /* Initialize common gart structure */
  62. r = radeon_gart_init(rdev);
  63. if (r) {
  64. return r;
  65. }
  66. if (rdev->gart.table.ram.ptr == NULL) {
  67. rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
  68. r = radeon_gart_table_ram_alloc(rdev);
  69. if (r) {
  70. return r;
  71. }
  72. }
  73. /* discard memory request outside of configured range */
  74. tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
  75. WREG32(RADEON_AIC_CNTL, tmp);
  76. /* set address range for PCI address translate */
  77. WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
  78. tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
  79. WREG32(RADEON_AIC_HI_ADDR, tmp);
  80. /* Enable bus mastering */
  81. tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
  82. WREG32(RADEON_BUS_CNTL, tmp);
  83. /* set PCI GART page-table base address */
  84. WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
  85. tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
  86. WREG32(RADEON_AIC_CNTL, tmp);
  87. r100_pci_gart_tlb_flush(rdev);
  88. rdev->gart.ready = true;
  89. return 0;
  90. }
  91. void r100_pci_gart_disable(struct radeon_device *rdev)
  92. {
  93. uint32_t tmp;
  94. /* discard memory request outside of configured range */
  95. tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
  96. WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
  97. WREG32(RADEON_AIC_LO_ADDR, 0);
  98. WREG32(RADEON_AIC_HI_ADDR, 0);
  99. }
  100. int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
  101. {
  102. if (i < 0 || i > rdev->gart.num_gpu_pages) {
  103. return -EINVAL;
  104. }
  105. rdev->gart.table.ram.ptr[i] = cpu_to_le32((uint32_t)addr);
  106. return 0;
  107. }
  108. int r100_gart_enable(struct radeon_device *rdev)
  109. {
  110. if (rdev->flags & RADEON_IS_AGP) {
  111. r100_pci_gart_disable(rdev);
  112. return 0;
  113. }
  114. return r100_pci_gart_enable(rdev);
  115. }
  116. /*
  117. * MC
  118. */
  119. void r100_mc_disable_clients(struct radeon_device *rdev)
  120. {
  121. uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
  122. /* FIXME: is this function correct for rs100,rs200,rs300 ? */
  123. if (r100_gui_wait_for_idle(rdev)) {
  124. printk(KERN_WARNING "Failed to wait GUI idle while "
  125. "programming pipes. Bad things might happen.\n");
  126. }
  127. /* stop display and memory access */
  128. ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
  129. WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
  130. crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
  131. WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
  132. crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
  133. r100_gpu_wait_for_vsync(rdev);
  134. WREG32(RADEON_CRTC_GEN_CNTL,
  135. (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
  136. RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
  137. if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
  138. crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
  139. r100_gpu_wait_for_vsync2(rdev);
  140. WREG32(RADEON_CRTC2_GEN_CNTL,
  141. (crtc2_gen_cntl &
  142. ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
  143. RADEON_CRTC2_DISP_REQ_EN_B);
  144. }
  145. udelay(500);
  146. }
  147. void r100_mc_setup(struct radeon_device *rdev)
  148. {
  149. uint32_t tmp;
  150. int r;
  151. r = r100_debugfs_mc_info_init(rdev);
  152. if (r) {
  153. DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
  154. }
  155. /* Write VRAM size in case we are limiting it */
  156. WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
  157. tmp = rdev->mc.vram_location + rdev->mc.vram_size - 1;
  158. tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
  159. tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
  160. WREG32(RADEON_MC_FB_LOCATION, tmp);
  161. /* Enable bus mastering */
  162. tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
  163. WREG32(RADEON_BUS_CNTL, tmp);
  164. if (rdev->flags & RADEON_IS_AGP) {
  165. tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
  166. tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
  167. tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
  168. WREG32(RADEON_MC_AGP_LOCATION, tmp);
  169. WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
  170. } else {
  171. WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
  172. WREG32(RADEON_AGP_BASE, 0);
  173. }
  174. tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
  175. tmp |= (7 << 28);
  176. WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
  177. (void)RREG32(RADEON_HOST_PATH_CNTL);
  178. WREG32(RADEON_HOST_PATH_CNTL, tmp);
  179. (void)RREG32(RADEON_HOST_PATH_CNTL);
  180. }
  181. int r100_mc_init(struct radeon_device *rdev)
  182. {
  183. int r;
  184. if (r100_debugfs_rbbm_init(rdev)) {
  185. DRM_ERROR("Failed to register debugfs file for RBBM !\n");
  186. }
  187. r100_gpu_init(rdev);
  188. /* Disable gart which also disable out of gart access */
  189. r100_pci_gart_disable(rdev);
  190. /* Setup GPU memory space */
  191. rdev->mc.vram_location = 0xFFFFFFFFUL;
  192. rdev->mc.gtt_location = 0xFFFFFFFFUL;
  193. if (rdev->flags & RADEON_IS_AGP) {
  194. r = radeon_agp_init(rdev);
  195. if (r) {
  196. printk(KERN_WARNING "[drm] Disabling AGP\n");
  197. rdev->flags &= ~RADEON_IS_AGP;
  198. rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
  199. } else {
  200. rdev->mc.gtt_location = rdev->mc.agp_base;
  201. }
  202. }
  203. r = radeon_mc_setup(rdev);
  204. if (r) {
  205. return r;
  206. }
  207. r100_mc_disable_clients(rdev);
  208. if (r100_mc_wait_for_idle(rdev)) {
  209. printk(KERN_WARNING "Failed to wait MC idle while "
  210. "programming pipes. Bad things might happen.\n");
  211. }
  212. r100_mc_setup(rdev);
  213. return 0;
  214. }
  215. void r100_mc_fini(struct radeon_device *rdev)
  216. {
  217. r100_pci_gart_disable(rdev);
  218. radeon_gart_table_ram_free(rdev);
  219. radeon_gart_fini(rdev);
  220. }
  221. /*
  222. * Fence emission
  223. */
  224. void r100_fence_ring_emit(struct radeon_device *rdev,
  225. struct radeon_fence *fence)
  226. {
  227. /* Who ever call radeon_fence_emit should call ring_lock and ask
  228. * for enough space (today caller are ib schedule and buffer move) */
  229. /* Wait until IDLE & CLEAN */
  230. radeon_ring_write(rdev, PACKET0(0x1720, 0));
  231. radeon_ring_write(rdev, (1 << 16) | (1 << 17));
  232. /* Emit fence sequence & fire IRQ */
  233. radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
  234. radeon_ring_write(rdev, fence->seq);
  235. radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
  236. radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
  237. }
  238. /*
  239. * Writeback
  240. */
  241. int r100_wb_init(struct radeon_device *rdev)
  242. {
  243. int r;
  244. if (rdev->wb.wb_obj == NULL) {
  245. r = radeon_object_create(rdev, NULL, 4096,
  246. true,
  247. RADEON_GEM_DOMAIN_GTT,
  248. false, &rdev->wb.wb_obj);
  249. if (r) {
  250. DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
  251. return r;
  252. }
  253. r = radeon_object_pin(rdev->wb.wb_obj,
  254. RADEON_GEM_DOMAIN_GTT,
  255. &rdev->wb.gpu_addr);
  256. if (r) {
  257. DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
  258. return r;
  259. }
  260. r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
  261. if (r) {
  262. DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
  263. return r;
  264. }
  265. }
  266. WREG32(0x774, rdev->wb.gpu_addr);
  267. WREG32(0x70C, rdev->wb.gpu_addr + 1024);
  268. WREG32(0x770, 0xff);
  269. return 0;
  270. }
  271. void r100_wb_fini(struct radeon_device *rdev)
  272. {
  273. if (rdev->wb.wb_obj) {
  274. radeon_object_kunmap(rdev->wb.wb_obj);
  275. radeon_object_unpin(rdev->wb.wb_obj);
  276. radeon_object_unref(&rdev->wb.wb_obj);
  277. rdev->wb.wb = NULL;
  278. rdev->wb.wb_obj = NULL;
  279. }
  280. }
  281. int r100_copy_blit(struct radeon_device *rdev,
  282. uint64_t src_offset,
  283. uint64_t dst_offset,
  284. unsigned num_pages,
  285. struct radeon_fence *fence)
  286. {
  287. uint32_t cur_pages;
  288. uint32_t stride_bytes = PAGE_SIZE;
  289. uint32_t pitch;
  290. uint32_t stride_pixels;
  291. unsigned ndw;
  292. int num_loops;
  293. int r = 0;
  294. /* radeon limited to 16k stride */
  295. stride_bytes &= 0x3fff;
  296. /* radeon pitch is /64 */
  297. pitch = stride_bytes / 64;
  298. stride_pixels = stride_bytes / 4;
  299. num_loops = DIV_ROUND_UP(num_pages, 8191);
  300. /* Ask for enough room for blit + flush + fence */
  301. ndw = 64 + (10 * num_loops);
  302. r = radeon_ring_lock(rdev, ndw);
  303. if (r) {
  304. DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
  305. return -EINVAL;
  306. }
  307. while (num_pages > 0) {
  308. cur_pages = num_pages;
  309. if (cur_pages > 8191) {
  310. cur_pages = 8191;
  311. }
  312. num_pages -= cur_pages;
  313. /* pages are in Y direction - height
  314. page width in X direction - width */
  315. radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
  316. radeon_ring_write(rdev,
  317. RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
  318. RADEON_GMC_DST_PITCH_OFFSET_CNTL |
  319. RADEON_GMC_SRC_CLIPPING |
  320. RADEON_GMC_DST_CLIPPING |
  321. RADEON_GMC_BRUSH_NONE |
  322. (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
  323. RADEON_GMC_SRC_DATATYPE_COLOR |
  324. RADEON_ROP3_S |
  325. RADEON_DP_SRC_SOURCE_MEMORY |
  326. RADEON_GMC_CLR_CMP_CNTL_DIS |
  327. RADEON_GMC_WR_MSK_DIS);
  328. radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
  329. radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
  330. radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
  331. radeon_ring_write(rdev, 0);
  332. radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
  333. radeon_ring_write(rdev, num_pages);
  334. radeon_ring_write(rdev, num_pages);
  335. radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
  336. }
  337. radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
  338. radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
  339. radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
  340. radeon_ring_write(rdev,
  341. RADEON_WAIT_2D_IDLECLEAN |
  342. RADEON_WAIT_HOST_IDLECLEAN |
  343. RADEON_WAIT_DMA_GUI_IDLE);
  344. if (fence) {
  345. r = radeon_fence_emit(rdev, fence);
  346. }
  347. radeon_ring_unlock_commit(rdev);
  348. return r;
  349. }
  350. /*
  351. * CP
  352. */
  353. void r100_ring_start(struct radeon_device *rdev)
  354. {
  355. int r;
  356. r = radeon_ring_lock(rdev, 2);
  357. if (r) {
  358. return;
  359. }
  360. radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
  361. radeon_ring_write(rdev,
  362. RADEON_ISYNC_ANY2D_IDLE3D |
  363. RADEON_ISYNC_ANY3D_IDLE2D |
  364. RADEON_ISYNC_WAIT_IDLEGUI |
  365. RADEON_ISYNC_CPSCRATCH_IDLEGUI);
  366. radeon_ring_unlock_commit(rdev);
  367. }
  368. static void r100_cp_load_microcode(struct radeon_device *rdev)
  369. {
  370. int i;
  371. if (r100_gui_wait_for_idle(rdev)) {
  372. printk(KERN_WARNING "Failed to wait GUI idle while "
  373. "programming pipes. Bad things might happen.\n");
  374. }
  375. WREG32(RADEON_CP_ME_RAM_ADDR, 0);
  376. if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
  377. (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
  378. (rdev->family == CHIP_RS200)) {
  379. DRM_INFO("Loading R100 Microcode\n");
  380. for (i = 0; i < 256; i++) {
  381. WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]);
  382. WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]);
  383. }
  384. } else if ((rdev->family == CHIP_R200) ||
  385. (rdev->family == CHIP_RV250) ||
  386. (rdev->family == CHIP_RV280) ||
  387. (rdev->family == CHIP_RS300)) {
  388. DRM_INFO("Loading R200 Microcode\n");
  389. for (i = 0; i < 256; i++) {
  390. WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]);
  391. WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]);
  392. }
  393. } else if ((rdev->family == CHIP_R300) ||
  394. (rdev->family == CHIP_R350) ||
  395. (rdev->family == CHIP_RV350) ||
  396. (rdev->family == CHIP_RV380) ||
  397. (rdev->family == CHIP_RS400) ||
  398. (rdev->family == CHIP_RS480)) {
  399. DRM_INFO("Loading R300 Microcode\n");
  400. for (i = 0; i < 256; i++) {
  401. WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]);
  402. WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]);
  403. }
  404. } else if ((rdev->family == CHIP_R420) ||
  405. (rdev->family == CHIP_R423) ||
  406. (rdev->family == CHIP_RV410)) {
  407. DRM_INFO("Loading R400 Microcode\n");
  408. for (i = 0; i < 256; i++) {
  409. WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]);
  410. WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]);
  411. }
  412. } else if ((rdev->family == CHIP_RS690) ||
  413. (rdev->family == CHIP_RS740)) {
  414. DRM_INFO("Loading RS690/RS740 Microcode\n");
  415. for (i = 0; i < 256; i++) {
  416. WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]);
  417. WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]);
  418. }
  419. } else if (rdev->family == CHIP_RS600) {
  420. DRM_INFO("Loading RS600 Microcode\n");
  421. for (i = 0; i < 256; i++) {
  422. WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]);
  423. WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]);
  424. }
  425. } else if ((rdev->family == CHIP_RV515) ||
  426. (rdev->family == CHIP_R520) ||
  427. (rdev->family == CHIP_RV530) ||
  428. (rdev->family == CHIP_R580) ||
  429. (rdev->family == CHIP_RV560) ||
  430. (rdev->family == CHIP_RV570)) {
  431. DRM_INFO("Loading R500 Microcode\n");
  432. for (i = 0; i < 256; i++) {
  433. WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]);
  434. WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]);
  435. }
  436. }
  437. }
  438. int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
  439. {
  440. unsigned rb_bufsz;
  441. unsigned rb_blksz;
  442. unsigned max_fetch;
  443. unsigned pre_write_timer;
  444. unsigned pre_write_limit;
  445. unsigned indirect2_start;
  446. unsigned indirect1_start;
  447. uint32_t tmp;
  448. int r;
  449. if (r100_debugfs_cp_init(rdev)) {
  450. DRM_ERROR("Failed to register debugfs file for CP !\n");
  451. }
  452. /* Reset CP */
  453. tmp = RREG32(RADEON_CP_CSQ_STAT);
  454. if ((tmp & (1 << 31))) {
  455. DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
  456. WREG32(RADEON_CP_CSQ_MODE, 0);
  457. WREG32(RADEON_CP_CSQ_CNTL, 0);
  458. WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
  459. tmp = RREG32(RADEON_RBBM_SOFT_RESET);
  460. mdelay(2);
  461. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  462. tmp = RREG32(RADEON_RBBM_SOFT_RESET);
  463. mdelay(2);
  464. tmp = RREG32(RADEON_CP_CSQ_STAT);
  465. if ((tmp & (1 << 31))) {
  466. DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
  467. }
  468. } else {
  469. DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
  470. }
  471. /* Align ring size */
  472. rb_bufsz = drm_order(ring_size / 8);
  473. ring_size = (1 << (rb_bufsz + 1)) * 4;
  474. r100_cp_load_microcode(rdev);
  475. r = radeon_ring_init(rdev, ring_size);
  476. if (r) {
  477. return r;
  478. }
  479. /* Each time the cp read 1024 bytes (16 dword/quadword) update
  480. * the rptr copy in system ram */
  481. rb_blksz = 9;
  482. /* cp will read 128bytes at a time (4 dwords) */
  483. max_fetch = 1;
  484. rdev->cp.align_mask = 16 - 1;
  485. /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
  486. pre_write_timer = 64;
  487. /* Force CP_RB_WPTR write if written more than one time before the
  488. * delay expire
  489. */
  490. pre_write_limit = 0;
  491. /* Setup the cp cache like this (cache size is 96 dwords) :
  492. * RING 0 to 15
  493. * INDIRECT1 16 to 79
  494. * INDIRECT2 80 to 95
  495. * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
  496. * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
  497. * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
  498. * Idea being that most of the gpu cmd will be through indirect1 buffer
  499. * so it gets the bigger cache.
  500. */
  501. indirect2_start = 80;
  502. indirect1_start = 16;
  503. /* cp setup */
  504. WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
  505. WREG32(RADEON_CP_RB_CNTL,
  506. #ifdef __BIG_ENDIAN
  507. RADEON_BUF_SWAP_32BIT |
  508. #endif
  509. REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
  510. REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
  511. REG_SET(RADEON_MAX_FETCH, max_fetch) |
  512. RADEON_RB_NO_UPDATE);
  513. /* Set ring address */
  514. DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
  515. WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
  516. /* Force read & write ptr to 0 */
  517. tmp = RREG32(RADEON_CP_RB_CNTL);
  518. WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
  519. WREG32(RADEON_CP_RB_RPTR_WR, 0);
  520. WREG32(RADEON_CP_RB_WPTR, 0);
  521. WREG32(RADEON_CP_RB_CNTL, tmp);
  522. udelay(10);
  523. rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
  524. rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
  525. /* Set cp mode to bus mastering & enable cp*/
  526. WREG32(RADEON_CP_CSQ_MODE,
  527. REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
  528. REG_SET(RADEON_INDIRECT1_START, indirect1_start));
  529. WREG32(0x718, 0);
  530. WREG32(0x744, 0x00004D4D);
  531. WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
  532. radeon_ring_start(rdev);
  533. r = radeon_ring_test(rdev);
  534. if (r) {
  535. DRM_ERROR("radeon: cp isn't working (%d).\n", r);
  536. return r;
  537. }
  538. rdev->cp.ready = true;
  539. return 0;
  540. }
  541. void r100_cp_fini(struct radeon_device *rdev)
  542. {
  543. /* Disable ring */
  544. rdev->cp.ready = false;
  545. WREG32(RADEON_CP_CSQ_CNTL, 0);
  546. radeon_ring_fini(rdev);
  547. DRM_INFO("radeon: cp finalized\n");
  548. }
  549. void r100_cp_disable(struct radeon_device *rdev)
  550. {
  551. /* Disable ring */
  552. rdev->cp.ready = false;
  553. WREG32(RADEON_CP_CSQ_MODE, 0);
  554. WREG32(RADEON_CP_CSQ_CNTL, 0);
  555. if (r100_gui_wait_for_idle(rdev)) {
  556. printk(KERN_WARNING "Failed to wait GUI idle while "
  557. "programming pipes. Bad things might happen.\n");
  558. }
  559. }
  560. int r100_cp_reset(struct radeon_device *rdev)
  561. {
  562. uint32_t tmp;
  563. bool reinit_cp;
  564. int i;
  565. reinit_cp = rdev->cp.ready;
  566. rdev->cp.ready = false;
  567. WREG32(RADEON_CP_CSQ_MODE, 0);
  568. WREG32(RADEON_CP_CSQ_CNTL, 0);
  569. WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
  570. (void)RREG32(RADEON_RBBM_SOFT_RESET);
  571. udelay(200);
  572. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  573. /* Wait to prevent race in RBBM_STATUS */
  574. mdelay(1);
  575. for (i = 0; i < rdev->usec_timeout; i++) {
  576. tmp = RREG32(RADEON_RBBM_STATUS);
  577. if (!(tmp & (1 << 16))) {
  578. DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
  579. tmp);
  580. if (reinit_cp) {
  581. return r100_cp_init(rdev, rdev->cp.ring_size);
  582. }
  583. return 0;
  584. }
  585. DRM_UDELAY(1);
  586. }
  587. tmp = RREG32(RADEON_RBBM_STATUS);
  588. DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
  589. return -1;
  590. }
  591. /*
  592. * CS functions
  593. */
  594. int r100_cs_parse_packet0(struct radeon_cs_parser *p,
  595. struct radeon_cs_packet *pkt,
  596. const unsigned *auth, unsigned n,
  597. radeon_packet0_check_t check)
  598. {
  599. unsigned reg;
  600. unsigned i, j, m;
  601. unsigned idx;
  602. int r;
  603. idx = pkt->idx + 1;
  604. reg = pkt->reg;
  605. /* Check that register fall into register range
  606. * determined by the number of entry (n) in the
  607. * safe register bitmap.
  608. */
  609. if (pkt->one_reg_wr) {
  610. if ((reg >> 7) > n) {
  611. return -EINVAL;
  612. }
  613. } else {
  614. if (((reg + (pkt->count << 2)) >> 7) > n) {
  615. return -EINVAL;
  616. }
  617. }
  618. for (i = 0; i <= pkt->count; i++, idx++) {
  619. j = (reg >> 7);
  620. m = 1 << ((reg >> 2) & 31);
  621. if (auth[j] & m) {
  622. r = check(p, pkt, idx, reg);
  623. if (r) {
  624. return r;
  625. }
  626. }
  627. if (pkt->one_reg_wr) {
  628. if (!(auth[j] & m)) {
  629. break;
  630. }
  631. } else {
  632. reg += 4;
  633. }
  634. }
  635. return 0;
  636. }
  637. void r100_cs_dump_packet(struct radeon_cs_parser *p,
  638. struct radeon_cs_packet *pkt)
  639. {
  640. struct radeon_cs_chunk *ib_chunk;
  641. volatile uint32_t *ib;
  642. unsigned i;
  643. unsigned idx;
  644. ib = p->ib->ptr;
  645. ib_chunk = &p->chunks[p->chunk_ib_idx];
  646. idx = pkt->idx;
  647. for (i = 0; i <= (pkt->count + 1); i++, idx++) {
  648. DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
  649. }
  650. }
  651. /**
  652. * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
  653. * @parser: parser structure holding parsing context.
  654. * @pkt: where to store packet informations
  655. *
  656. * Assume that chunk_ib_index is properly set. Will return -EINVAL
  657. * if packet is bigger than remaining ib size. or if packets is unknown.
  658. **/
  659. int r100_cs_packet_parse(struct radeon_cs_parser *p,
  660. struct radeon_cs_packet *pkt,
  661. unsigned idx)
  662. {
  663. struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
  664. uint32_t header = ib_chunk->kdata[idx];
  665. if (idx >= ib_chunk->length_dw) {
  666. DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
  667. idx, ib_chunk->length_dw);
  668. return -EINVAL;
  669. }
  670. pkt->idx = idx;
  671. pkt->type = CP_PACKET_GET_TYPE(header);
  672. pkt->count = CP_PACKET_GET_COUNT(header);
  673. switch (pkt->type) {
  674. case PACKET_TYPE0:
  675. pkt->reg = CP_PACKET0_GET_REG(header);
  676. pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
  677. break;
  678. case PACKET_TYPE3:
  679. pkt->opcode = CP_PACKET3_GET_OPCODE(header);
  680. break;
  681. case PACKET_TYPE2:
  682. pkt->count = -1;
  683. break;
  684. default:
  685. DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
  686. return -EINVAL;
  687. }
  688. if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
  689. DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
  690. pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
  691. return -EINVAL;
  692. }
  693. return 0;
  694. }
  695. /**
  696. * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
  697. * @parser: parser structure holding parsing context.
  698. * @data: pointer to relocation data
  699. * @offset_start: starting offset
  700. * @offset_mask: offset mask (to align start offset on)
  701. * @reloc: reloc informations
  702. *
  703. * Check next packet is relocation packet3, do bo validation and compute
  704. * GPU offset using the provided start.
  705. **/
  706. int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
  707. struct radeon_cs_reloc **cs_reloc)
  708. {
  709. struct radeon_cs_chunk *ib_chunk;
  710. struct radeon_cs_chunk *relocs_chunk;
  711. struct radeon_cs_packet p3reloc;
  712. unsigned idx;
  713. int r;
  714. if (p->chunk_relocs_idx == -1) {
  715. DRM_ERROR("No relocation chunk !\n");
  716. return -EINVAL;
  717. }
  718. *cs_reloc = NULL;
  719. ib_chunk = &p->chunks[p->chunk_ib_idx];
  720. relocs_chunk = &p->chunks[p->chunk_relocs_idx];
  721. r = r100_cs_packet_parse(p, &p3reloc, p->idx);
  722. if (r) {
  723. return r;
  724. }
  725. p->idx += p3reloc.count + 2;
  726. if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
  727. DRM_ERROR("No packet3 for relocation for packet at %d.\n",
  728. p3reloc.idx);
  729. r100_cs_dump_packet(p, &p3reloc);
  730. return -EINVAL;
  731. }
  732. idx = ib_chunk->kdata[p3reloc.idx + 1];
  733. if (idx >= relocs_chunk->length_dw) {
  734. DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
  735. idx, relocs_chunk->length_dw);
  736. r100_cs_dump_packet(p, &p3reloc);
  737. return -EINVAL;
  738. }
  739. /* FIXME: we assume reloc size is 4 dwords */
  740. *cs_reloc = p->relocs_ptr[(idx / 4)];
  741. return 0;
  742. }
  743. static int r100_packet0_check(struct radeon_cs_parser *p,
  744. struct radeon_cs_packet *pkt)
  745. {
  746. struct radeon_cs_chunk *ib_chunk;
  747. struct radeon_cs_reloc *reloc;
  748. volatile uint32_t *ib;
  749. uint32_t tmp;
  750. unsigned reg;
  751. unsigned i;
  752. unsigned idx;
  753. bool onereg;
  754. int r;
  755. ib = p->ib->ptr;
  756. ib_chunk = &p->chunks[p->chunk_ib_idx];
  757. idx = pkt->idx + 1;
  758. reg = pkt->reg;
  759. onereg = false;
  760. if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
  761. onereg = true;
  762. }
  763. for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
  764. switch (reg) {
  765. /* FIXME: only allow PACKET3 blit? easier to check for out of
  766. * range access */
  767. case RADEON_DST_PITCH_OFFSET:
  768. case RADEON_SRC_PITCH_OFFSET:
  769. r = r100_cs_packet_next_reloc(p, &reloc);
  770. if (r) {
  771. DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
  772. idx, reg);
  773. r100_cs_dump_packet(p, pkt);
  774. return r;
  775. }
  776. tmp = ib_chunk->kdata[idx] & 0x003fffff;
  777. tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
  778. ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
  779. break;
  780. case RADEON_RB3D_DEPTHOFFSET:
  781. case RADEON_RB3D_COLOROFFSET:
  782. case R300_RB3D_COLOROFFSET0:
  783. case R300_ZB_DEPTHOFFSET:
  784. case R200_PP_TXOFFSET_0:
  785. case R200_PP_TXOFFSET_1:
  786. case R200_PP_TXOFFSET_2:
  787. case R200_PP_TXOFFSET_3:
  788. case R200_PP_TXOFFSET_4:
  789. case R200_PP_TXOFFSET_5:
  790. case RADEON_PP_TXOFFSET_0:
  791. case RADEON_PP_TXOFFSET_1:
  792. case RADEON_PP_TXOFFSET_2:
  793. case R300_TX_OFFSET_0:
  794. case R300_TX_OFFSET_0+4:
  795. case R300_TX_OFFSET_0+8:
  796. case R300_TX_OFFSET_0+12:
  797. case R300_TX_OFFSET_0+16:
  798. case R300_TX_OFFSET_0+20:
  799. case R300_TX_OFFSET_0+24:
  800. case R300_TX_OFFSET_0+28:
  801. case R300_TX_OFFSET_0+32:
  802. case R300_TX_OFFSET_0+36:
  803. case R300_TX_OFFSET_0+40:
  804. case R300_TX_OFFSET_0+44:
  805. case R300_TX_OFFSET_0+48:
  806. case R300_TX_OFFSET_0+52:
  807. case R300_TX_OFFSET_0+56:
  808. case R300_TX_OFFSET_0+60:
  809. r = r100_cs_packet_next_reloc(p, &reloc);
  810. if (r) {
  811. DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
  812. idx, reg);
  813. r100_cs_dump_packet(p, pkt);
  814. return r;
  815. }
  816. ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
  817. break;
  818. default:
  819. /* FIXME: we don't want to allow anyothers packet */
  820. break;
  821. }
  822. if (onereg) {
  823. /* FIXME: forbid onereg write to register on relocate */
  824. break;
  825. }
  826. }
  827. return 0;
  828. }
  829. int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
  830. struct radeon_cs_packet *pkt,
  831. struct radeon_object *robj)
  832. {
  833. struct radeon_cs_chunk *ib_chunk;
  834. unsigned idx;
  835. ib_chunk = &p->chunks[p->chunk_ib_idx];
  836. idx = pkt->idx + 1;
  837. if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) {
  838. DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
  839. "(need %u have %lu) !\n",
  840. ib_chunk->kdata[idx+2] + 1,
  841. radeon_object_size(robj));
  842. return -EINVAL;
  843. }
  844. return 0;
  845. }
  846. static int r100_packet3_check(struct radeon_cs_parser *p,
  847. struct radeon_cs_packet *pkt)
  848. {
  849. struct radeon_cs_chunk *ib_chunk;
  850. struct radeon_cs_reloc *reloc;
  851. unsigned idx;
  852. unsigned i, c;
  853. volatile uint32_t *ib;
  854. int r;
  855. ib = p->ib->ptr;
  856. ib_chunk = &p->chunks[p->chunk_ib_idx];
  857. idx = pkt->idx + 1;
  858. switch (pkt->opcode) {
  859. case PACKET3_3D_LOAD_VBPNTR:
  860. c = ib_chunk->kdata[idx++];
  861. for (i = 0; i < (c - 1); i += 2, idx += 3) {
  862. r = r100_cs_packet_next_reloc(p, &reloc);
  863. if (r) {
  864. DRM_ERROR("No reloc for packet3 %d\n",
  865. pkt->opcode);
  866. r100_cs_dump_packet(p, pkt);
  867. return r;
  868. }
  869. ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
  870. r = r100_cs_packet_next_reloc(p, &reloc);
  871. if (r) {
  872. DRM_ERROR("No reloc for packet3 %d\n",
  873. pkt->opcode);
  874. r100_cs_dump_packet(p, pkt);
  875. return r;
  876. }
  877. ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
  878. }
  879. if (c & 1) {
  880. r = r100_cs_packet_next_reloc(p, &reloc);
  881. if (r) {
  882. DRM_ERROR("No reloc for packet3 %d\n",
  883. pkt->opcode);
  884. r100_cs_dump_packet(p, pkt);
  885. return r;
  886. }
  887. ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
  888. }
  889. break;
  890. case PACKET3_INDX_BUFFER:
  891. r = r100_cs_packet_next_reloc(p, &reloc);
  892. if (r) {
  893. DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
  894. r100_cs_dump_packet(p, pkt);
  895. return r;
  896. }
  897. ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
  898. r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
  899. if (r) {
  900. return r;
  901. }
  902. break;
  903. case 0x23:
  904. /* FIXME: cleanup */
  905. /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
  906. r = r100_cs_packet_next_reloc(p, &reloc);
  907. if (r) {
  908. DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
  909. r100_cs_dump_packet(p, pkt);
  910. return r;
  911. }
  912. ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
  913. break;
  914. case PACKET3_3D_DRAW_IMMD:
  915. /* triggers drawing using in-packet vertex data */
  916. case PACKET3_3D_DRAW_IMMD_2:
  917. /* triggers drawing using in-packet vertex data */
  918. case PACKET3_3D_DRAW_VBUF_2:
  919. /* triggers drawing of vertex buffers setup elsewhere */
  920. case PACKET3_3D_DRAW_INDX_2:
  921. /* triggers drawing using indices to vertex buffer */
  922. case PACKET3_3D_DRAW_VBUF:
  923. /* triggers drawing of vertex buffers setup elsewhere */
  924. case PACKET3_3D_DRAW_INDX:
  925. /* triggers drawing using indices to vertex buffer */
  926. case PACKET3_NOP:
  927. break;
  928. default:
  929. DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
  930. return -EINVAL;
  931. }
  932. return 0;
  933. }
  934. int r100_cs_parse(struct radeon_cs_parser *p)
  935. {
  936. struct radeon_cs_packet pkt;
  937. int r;
  938. do {
  939. r = r100_cs_packet_parse(p, &pkt, p->idx);
  940. if (r) {
  941. return r;
  942. }
  943. p->idx += pkt.count + 2;
  944. switch (pkt.type) {
  945. case PACKET_TYPE0:
  946. r = r100_packet0_check(p, &pkt);
  947. break;
  948. case PACKET_TYPE2:
  949. break;
  950. case PACKET_TYPE3:
  951. r = r100_packet3_check(p, &pkt);
  952. break;
  953. default:
  954. DRM_ERROR("Unknown packet type %d !\n",
  955. pkt.type);
  956. return -EINVAL;
  957. }
  958. if (r) {
  959. return r;
  960. }
  961. } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
  962. return 0;
  963. }
  964. /*
  965. * Global GPU functions
  966. */
  967. void r100_errata(struct radeon_device *rdev)
  968. {
  969. rdev->pll_errata = 0;
  970. if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
  971. rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
  972. }
  973. if (rdev->family == CHIP_RV100 ||
  974. rdev->family == CHIP_RS100 ||
  975. rdev->family == CHIP_RS200) {
  976. rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
  977. }
  978. }
  979. /* Wait for vertical sync on primary CRTC */
  980. void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
  981. {
  982. uint32_t crtc_gen_cntl, tmp;
  983. int i;
  984. crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
  985. if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
  986. !(crtc_gen_cntl & RADEON_CRTC_EN)) {
  987. return;
  988. }
  989. /* Clear the CRTC_VBLANK_SAVE bit */
  990. WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
  991. for (i = 0; i < rdev->usec_timeout; i++) {
  992. tmp = RREG32(RADEON_CRTC_STATUS);
  993. if (tmp & RADEON_CRTC_VBLANK_SAVE) {
  994. return;
  995. }
  996. DRM_UDELAY(1);
  997. }
  998. }
  999. /* Wait for vertical sync on secondary CRTC */
  1000. void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
  1001. {
  1002. uint32_t crtc2_gen_cntl, tmp;
  1003. int i;
  1004. crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
  1005. if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
  1006. !(crtc2_gen_cntl & RADEON_CRTC2_EN))
  1007. return;
  1008. /* Clear the CRTC_VBLANK_SAVE bit */
  1009. WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
  1010. for (i = 0; i < rdev->usec_timeout; i++) {
  1011. tmp = RREG32(RADEON_CRTC2_STATUS);
  1012. if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
  1013. return;
  1014. }
  1015. DRM_UDELAY(1);
  1016. }
  1017. }
  1018. int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
  1019. {
  1020. unsigned i;
  1021. uint32_t tmp;
  1022. for (i = 0; i < rdev->usec_timeout; i++) {
  1023. tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
  1024. if (tmp >= n) {
  1025. return 0;
  1026. }
  1027. DRM_UDELAY(1);
  1028. }
  1029. return -1;
  1030. }
  1031. int r100_gui_wait_for_idle(struct radeon_device *rdev)
  1032. {
  1033. unsigned i;
  1034. uint32_t tmp;
  1035. if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
  1036. printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
  1037. " Bad things might happen.\n");
  1038. }
  1039. for (i = 0; i < rdev->usec_timeout; i++) {
  1040. tmp = RREG32(RADEON_RBBM_STATUS);
  1041. if (!(tmp & (1 << 31))) {
  1042. return 0;
  1043. }
  1044. DRM_UDELAY(1);
  1045. }
  1046. return -1;
  1047. }
  1048. int r100_mc_wait_for_idle(struct radeon_device *rdev)
  1049. {
  1050. unsigned i;
  1051. uint32_t tmp;
  1052. for (i = 0; i < rdev->usec_timeout; i++) {
  1053. /* read MC_STATUS */
  1054. tmp = RREG32(0x0150);
  1055. if (tmp & (1 << 2)) {
  1056. return 0;
  1057. }
  1058. DRM_UDELAY(1);
  1059. }
  1060. return -1;
  1061. }
  1062. void r100_gpu_init(struct radeon_device *rdev)
  1063. {
  1064. /* TODO: anythings to do here ? pipes ? */
  1065. r100_hdp_reset(rdev);
  1066. }
  1067. void r100_hdp_reset(struct radeon_device *rdev)
  1068. {
  1069. uint32_t tmp;
  1070. tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
  1071. tmp |= (7 << 28);
  1072. WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
  1073. (void)RREG32(RADEON_HOST_PATH_CNTL);
  1074. udelay(200);
  1075. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  1076. WREG32(RADEON_HOST_PATH_CNTL, tmp);
  1077. (void)RREG32(RADEON_HOST_PATH_CNTL);
  1078. }
  1079. int r100_rb2d_reset(struct radeon_device *rdev)
  1080. {
  1081. uint32_t tmp;
  1082. int i;
  1083. WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
  1084. (void)RREG32(RADEON_RBBM_SOFT_RESET);
  1085. udelay(200);
  1086. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  1087. /* Wait to prevent race in RBBM_STATUS */
  1088. mdelay(1);
  1089. for (i = 0; i < rdev->usec_timeout; i++) {
  1090. tmp = RREG32(RADEON_RBBM_STATUS);
  1091. if (!(tmp & (1 << 26))) {
  1092. DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
  1093. tmp);
  1094. return 0;
  1095. }
  1096. DRM_UDELAY(1);
  1097. }
  1098. tmp = RREG32(RADEON_RBBM_STATUS);
  1099. DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
  1100. return -1;
  1101. }
  1102. int r100_gpu_reset(struct radeon_device *rdev)
  1103. {
  1104. uint32_t status;
  1105. /* reset order likely matter */
  1106. status = RREG32(RADEON_RBBM_STATUS);
  1107. /* reset HDP */
  1108. r100_hdp_reset(rdev);
  1109. /* reset rb2d */
  1110. if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
  1111. r100_rb2d_reset(rdev);
  1112. }
  1113. /* TODO: reset 3D engine */
  1114. /* reset CP */
  1115. status = RREG32(RADEON_RBBM_STATUS);
  1116. if (status & (1 << 16)) {
  1117. r100_cp_reset(rdev);
  1118. }
  1119. /* Check if GPU is idle */
  1120. status = RREG32(RADEON_RBBM_STATUS);
  1121. if (status & (1 << 31)) {
  1122. DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
  1123. return -1;
  1124. }
  1125. DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
  1126. return 0;
  1127. }
  1128. /*
  1129. * VRAM info
  1130. */
  1131. static void r100_vram_get_type(struct radeon_device *rdev)
  1132. {
  1133. uint32_t tmp;
  1134. rdev->mc.vram_is_ddr = false;
  1135. if (rdev->flags & RADEON_IS_IGP)
  1136. rdev->mc.vram_is_ddr = true;
  1137. else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
  1138. rdev->mc.vram_is_ddr = true;
  1139. if ((rdev->family == CHIP_RV100) ||
  1140. (rdev->family == CHIP_RS100) ||
  1141. (rdev->family == CHIP_RS200)) {
  1142. tmp = RREG32(RADEON_MEM_CNTL);
  1143. if (tmp & RV100_HALF_MODE) {
  1144. rdev->mc.vram_width = 32;
  1145. } else {
  1146. rdev->mc.vram_width = 64;
  1147. }
  1148. if (rdev->flags & RADEON_SINGLE_CRTC) {
  1149. rdev->mc.vram_width /= 4;
  1150. rdev->mc.vram_is_ddr = true;
  1151. }
  1152. } else if (rdev->family <= CHIP_RV280) {
  1153. tmp = RREG32(RADEON_MEM_CNTL);
  1154. if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
  1155. rdev->mc.vram_width = 128;
  1156. } else {
  1157. rdev->mc.vram_width = 64;
  1158. }
  1159. } else {
  1160. /* newer IGPs */
  1161. rdev->mc.vram_width = 128;
  1162. }
  1163. }
  1164. void r100_vram_info(struct radeon_device *rdev)
  1165. {
  1166. r100_vram_get_type(rdev);
  1167. if (rdev->flags & RADEON_IS_IGP) {
  1168. uint32_t tom;
  1169. /* read NB_TOM to get the amount of ram stolen for the GPU */
  1170. tom = RREG32(RADEON_NB_TOM);
  1171. rdev->mc.vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
  1172. WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
  1173. } else {
  1174. rdev->mc.vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
  1175. /* Some production boards of m6 will report 0
  1176. * if it's 8 MB
  1177. */
  1178. if (rdev->mc.vram_size == 0) {
  1179. rdev->mc.vram_size = 8192 * 1024;
  1180. WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.vram_size);
  1181. }
  1182. }
  1183. rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
  1184. rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
  1185. }
  1186. /*
  1187. * Indirect registers accessor
  1188. */
  1189. void r100_pll_errata_after_index(struct radeon_device *rdev)
  1190. {
  1191. if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
  1192. return;
  1193. }
  1194. (void)RREG32(RADEON_CLOCK_CNTL_DATA);
  1195. (void)RREG32(RADEON_CRTC_GEN_CNTL);
  1196. }
  1197. static void r100_pll_errata_after_data(struct radeon_device *rdev)
  1198. {
  1199. /* This workarounds is necessary on RV100, RS100 and RS200 chips
  1200. * or the chip could hang on a subsequent access
  1201. */
  1202. if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
  1203. udelay(5000);
  1204. }
  1205. /* This function is required to workaround a hardware bug in some (all?)
  1206. * revisions of the R300. This workaround should be called after every
  1207. * CLOCK_CNTL_INDEX register access. If not, register reads afterward
  1208. * may not be correct.
  1209. */
  1210. if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
  1211. uint32_t save, tmp;
  1212. save = RREG32(RADEON_CLOCK_CNTL_INDEX);
  1213. tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
  1214. WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
  1215. tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
  1216. WREG32(RADEON_CLOCK_CNTL_INDEX, save);
  1217. }
  1218. }
  1219. uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
  1220. {
  1221. uint32_t data;
  1222. WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
  1223. r100_pll_errata_after_index(rdev);
  1224. data = RREG32(RADEON_CLOCK_CNTL_DATA);
  1225. r100_pll_errata_after_data(rdev);
  1226. return data;
  1227. }
  1228. void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
  1229. {
  1230. WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
  1231. r100_pll_errata_after_index(rdev);
  1232. WREG32(RADEON_CLOCK_CNTL_DATA, v);
  1233. r100_pll_errata_after_data(rdev);
  1234. }
  1235. uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
  1236. {
  1237. if (reg < 0x10000)
  1238. return readl(((void __iomem *)rdev->rmmio) + reg);
  1239. else {
  1240. writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
  1241. return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
  1242. }
  1243. }
  1244. void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
  1245. {
  1246. if (reg < 0x10000)
  1247. writel(v, ((void __iomem *)rdev->rmmio) + reg);
  1248. else {
  1249. writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
  1250. writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
  1251. }
  1252. }
  1253. int r100_init(struct radeon_device *rdev)
  1254. {
  1255. return 0;
  1256. }
  1257. /*
  1258. * Debugfs info
  1259. */
  1260. #if defined(CONFIG_DEBUG_FS)
  1261. static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
  1262. {
  1263. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1264. struct drm_device *dev = node->minor->dev;
  1265. struct radeon_device *rdev = dev->dev_private;
  1266. uint32_t reg, value;
  1267. unsigned i;
  1268. seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
  1269. seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
  1270. seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
  1271. for (i = 0; i < 64; i++) {
  1272. WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
  1273. reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
  1274. WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
  1275. value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
  1276. seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
  1277. }
  1278. return 0;
  1279. }
  1280. static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
  1281. {
  1282. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1283. struct drm_device *dev = node->minor->dev;
  1284. struct radeon_device *rdev = dev->dev_private;
  1285. uint32_t rdp, wdp;
  1286. unsigned count, i, j;
  1287. radeon_ring_free_size(rdev);
  1288. rdp = RREG32(RADEON_CP_RB_RPTR);
  1289. wdp = RREG32(RADEON_CP_RB_WPTR);
  1290. count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
  1291. seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
  1292. seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
  1293. seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
  1294. seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
  1295. seq_printf(m, "%u dwords in ring\n", count);
  1296. for (j = 0; j <= count; j++) {
  1297. i = (rdp + j) & rdev->cp.ptr_mask;
  1298. seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
  1299. }
  1300. return 0;
  1301. }
  1302. static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
  1303. {
  1304. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1305. struct drm_device *dev = node->minor->dev;
  1306. struct radeon_device *rdev = dev->dev_private;
  1307. uint32_t csq_stat, csq2_stat, tmp;
  1308. unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
  1309. unsigned i;
  1310. seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
  1311. seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
  1312. csq_stat = RREG32(RADEON_CP_CSQ_STAT);
  1313. csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
  1314. r_rptr = (csq_stat >> 0) & 0x3ff;
  1315. r_wptr = (csq_stat >> 10) & 0x3ff;
  1316. ib1_rptr = (csq_stat >> 20) & 0x3ff;
  1317. ib1_wptr = (csq2_stat >> 0) & 0x3ff;
  1318. ib2_rptr = (csq2_stat >> 10) & 0x3ff;
  1319. ib2_wptr = (csq2_stat >> 20) & 0x3ff;
  1320. seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
  1321. seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
  1322. seq_printf(m, "Ring rptr %u\n", r_rptr);
  1323. seq_printf(m, "Ring wptr %u\n", r_wptr);
  1324. seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
  1325. seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
  1326. seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
  1327. seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
  1328. /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
  1329. * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
  1330. seq_printf(m, "Ring fifo:\n");
  1331. for (i = 0; i < 256; i++) {
  1332. WREG32(RADEON_CP_CSQ_ADDR, i << 2);
  1333. tmp = RREG32(RADEON_CP_CSQ_DATA);
  1334. seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
  1335. }
  1336. seq_printf(m, "Indirect1 fifo:\n");
  1337. for (i = 256; i <= 512; i++) {
  1338. WREG32(RADEON_CP_CSQ_ADDR, i << 2);
  1339. tmp = RREG32(RADEON_CP_CSQ_DATA);
  1340. seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
  1341. }
  1342. seq_printf(m, "Indirect2 fifo:\n");
  1343. for (i = 640; i < ib1_wptr; i++) {
  1344. WREG32(RADEON_CP_CSQ_ADDR, i << 2);
  1345. tmp = RREG32(RADEON_CP_CSQ_DATA);
  1346. seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
  1347. }
  1348. return 0;
  1349. }
  1350. static int r100_debugfs_mc_info(struct seq_file *m, void *data)
  1351. {
  1352. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1353. struct drm_device *dev = node->minor->dev;
  1354. struct radeon_device *rdev = dev->dev_private;
  1355. uint32_t tmp;
  1356. tmp = RREG32(RADEON_CONFIG_MEMSIZE);
  1357. seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
  1358. tmp = RREG32(RADEON_MC_FB_LOCATION);
  1359. seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
  1360. tmp = RREG32(RADEON_BUS_CNTL);
  1361. seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
  1362. tmp = RREG32(RADEON_MC_AGP_LOCATION);
  1363. seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
  1364. tmp = RREG32(RADEON_AGP_BASE);
  1365. seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
  1366. tmp = RREG32(RADEON_HOST_PATH_CNTL);
  1367. seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
  1368. tmp = RREG32(0x01D0);
  1369. seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
  1370. tmp = RREG32(RADEON_AIC_LO_ADDR);
  1371. seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
  1372. tmp = RREG32(RADEON_AIC_HI_ADDR);
  1373. seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
  1374. tmp = RREG32(0x01E4);
  1375. seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
  1376. return 0;
  1377. }
  1378. static struct drm_info_list r100_debugfs_rbbm_list[] = {
  1379. {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
  1380. };
  1381. static struct drm_info_list r100_debugfs_cp_list[] = {
  1382. {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
  1383. {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
  1384. };
  1385. static struct drm_info_list r100_debugfs_mc_info_list[] = {
  1386. {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
  1387. };
  1388. #endif
  1389. int r100_debugfs_rbbm_init(struct radeon_device *rdev)
  1390. {
  1391. #if defined(CONFIG_DEBUG_FS)
  1392. return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
  1393. #else
  1394. return 0;
  1395. #endif
  1396. }
  1397. int r100_debugfs_cp_init(struct radeon_device *rdev)
  1398. {
  1399. #if defined(CONFIG_DEBUG_FS)
  1400. return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
  1401. #else
  1402. return 0;
  1403. #endif
  1404. }
  1405. int r100_debugfs_mc_info_init(struct radeon_device *rdev)
  1406. {
  1407. #if defined(CONFIG_DEBUG_FS)
  1408. return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
  1409. #else
  1410. return 0;
  1411. #endif
  1412. }