ni.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348
  1. /*
  2. * Copyright 2010 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: Alex Deucher
  23. */
  24. #include <linux/firmware.h>
  25. #include <linux/platform_device.h>
  26. #include <linux/slab.h>
  27. #include "drmP.h"
  28. #include "radeon.h"
  29. #include "radeon_asic.h"
  30. #include "radeon_drm.h"
  31. #include "nid.h"
  32. #include "atom.h"
  33. #include "ni_reg.h"
  34. #include "cayman_blit_shaders.h"
  35. extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
  36. extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
  37. extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
  38. #define EVERGREEN_PFP_UCODE_SIZE 1120
  39. #define EVERGREEN_PM4_UCODE_SIZE 1376
  40. #define EVERGREEN_RLC_UCODE_SIZE 768
  41. #define BTC_MC_UCODE_SIZE 6024
  42. #define CAYMAN_PFP_UCODE_SIZE 2176
  43. #define CAYMAN_PM4_UCODE_SIZE 2176
  44. #define CAYMAN_RLC_UCODE_SIZE 1024
  45. #define CAYMAN_MC_UCODE_SIZE 6037
  46. /* Firmware Names */
  47. MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
  48. MODULE_FIRMWARE("radeon/BARTS_me.bin");
  49. MODULE_FIRMWARE("radeon/BARTS_mc.bin");
  50. MODULE_FIRMWARE("radeon/BTC_rlc.bin");
  51. MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
  52. MODULE_FIRMWARE("radeon/TURKS_me.bin");
  53. MODULE_FIRMWARE("radeon/TURKS_mc.bin");
  54. MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
  55. MODULE_FIRMWARE("radeon/CAICOS_me.bin");
  56. MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
  57. MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
  58. MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
  59. MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
  60. MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
  61. #define BTC_IO_MC_REGS_SIZE 29
  62. static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
  63. {0x00000077, 0xff010100},
  64. {0x00000078, 0x00000000},
  65. {0x00000079, 0x00001434},
  66. {0x0000007a, 0xcc08ec08},
  67. {0x0000007b, 0x00040000},
  68. {0x0000007c, 0x000080c0},
  69. {0x0000007d, 0x09000000},
  70. {0x0000007e, 0x00210404},
  71. {0x00000081, 0x08a8e800},
  72. {0x00000082, 0x00030444},
  73. {0x00000083, 0x00000000},
  74. {0x00000085, 0x00000001},
  75. {0x00000086, 0x00000002},
  76. {0x00000087, 0x48490000},
  77. {0x00000088, 0x20244647},
  78. {0x00000089, 0x00000005},
  79. {0x0000008b, 0x66030000},
  80. {0x0000008c, 0x00006603},
  81. {0x0000008d, 0x00000100},
  82. {0x0000008f, 0x00001c0a},
  83. {0x00000090, 0xff000001},
  84. {0x00000094, 0x00101101},
  85. {0x00000095, 0x00000fff},
  86. {0x00000096, 0x00116fff},
  87. {0x00000097, 0x60010000},
  88. {0x00000098, 0x10010000},
  89. {0x00000099, 0x00006000},
  90. {0x0000009a, 0x00001000},
  91. {0x0000009f, 0x00946a00}
  92. };
  93. static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
  94. {0x00000077, 0xff010100},
  95. {0x00000078, 0x00000000},
  96. {0x00000079, 0x00001434},
  97. {0x0000007a, 0xcc08ec08},
  98. {0x0000007b, 0x00040000},
  99. {0x0000007c, 0x000080c0},
  100. {0x0000007d, 0x09000000},
  101. {0x0000007e, 0x00210404},
  102. {0x00000081, 0x08a8e800},
  103. {0x00000082, 0x00030444},
  104. {0x00000083, 0x00000000},
  105. {0x00000085, 0x00000001},
  106. {0x00000086, 0x00000002},
  107. {0x00000087, 0x48490000},
  108. {0x00000088, 0x20244647},
  109. {0x00000089, 0x00000005},
  110. {0x0000008b, 0x66030000},
  111. {0x0000008c, 0x00006603},
  112. {0x0000008d, 0x00000100},
  113. {0x0000008f, 0x00001c0a},
  114. {0x00000090, 0xff000001},
  115. {0x00000094, 0x00101101},
  116. {0x00000095, 0x00000fff},
  117. {0x00000096, 0x00116fff},
  118. {0x00000097, 0x60010000},
  119. {0x00000098, 0x10010000},
  120. {0x00000099, 0x00006000},
  121. {0x0000009a, 0x00001000},
  122. {0x0000009f, 0x00936a00}
  123. };
  124. static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
  125. {0x00000077, 0xff010100},
  126. {0x00000078, 0x00000000},
  127. {0x00000079, 0x00001434},
  128. {0x0000007a, 0xcc08ec08},
  129. {0x0000007b, 0x00040000},
  130. {0x0000007c, 0x000080c0},
  131. {0x0000007d, 0x09000000},
  132. {0x0000007e, 0x00210404},
  133. {0x00000081, 0x08a8e800},
  134. {0x00000082, 0x00030444},
  135. {0x00000083, 0x00000000},
  136. {0x00000085, 0x00000001},
  137. {0x00000086, 0x00000002},
  138. {0x00000087, 0x48490000},
  139. {0x00000088, 0x20244647},
  140. {0x00000089, 0x00000005},
  141. {0x0000008b, 0x66030000},
  142. {0x0000008c, 0x00006603},
  143. {0x0000008d, 0x00000100},
  144. {0x0000008f, 0x00001c0a},
  145. {0x00000090, 0xff000001},
  146. {0x00000094, 0x00101101},
  147. {0x00000095, 0x00000fff},
  148. {0x00000096, 0x00116fff},
  149. {0x00000097, 0x60010000},
  150. {0x00000098, 0x10010000},
  151. {0x00000099, 0x00006000},
  152. {0x0000009a, 0x00001000},
  153. {0x0000009f, 0x00916a00}
  154. };
  155. static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
  156. {0x00000077, 0xff010100},
  157. {0x00000078, 0x00000000},
  158. {0x00000079, 0x00001434},
  159. {0x0000007a, 0xcc08ec08},
  160. {0x0000007b, 0x00040000},
  161. {0x0000007c, 0x000080c0},
  162. {0x0000007d, 0x09000000},
  163. {0x0000007e, 0x00210404},
  164. {0x00000081, 0x08a8e800},
  165. {0x00000082, 0x00030444},
  166. {0x00000083, 0x00000000},
  167. {0x00000085, 0x00000001},
  168. {0x00000086, 0x00000002},
  169. {0x00000087, 0x48490000},
  170. {0x00000088, 0x20244647},
  171. {0x00000089, 0x00000005},
  172. {0x0000008b, 0x66030000},
  173. {0x0000008c, 0x00006603},
  174. {0x0000008d, 0x00000100},
  175. {0x0000008f, 0x00001c0a},
  176. {0x00000090, 0xff000001},
  177. {0x00000094, 0x00101101},
  178. {0x00000095, 0x00000fff},
  179. {0x00000096, 0x00116fff},
  180. {0x00000097, 0x60010000},
  181. {0x00000098, 0x10010000},
  182. {0x00000099, 0x00006000},
  183. {0x0000009a, 0x00001000},
  184. {0x0000009f, 0x00976b00}
  185. };
  186. int btc_mc_load_microcode(struct radeon_device *rdev)
  187. {
  188. const __be32 *fw_data;
  189. u32 mem_type, running, blackout = 0;
  190. u32 *io_mc_regs;
  191. int i, ucode_size, regs_size;
  192. if (!rdev->mc_fw)
  193. return -EINVAL;
  194. switch (rdev->family) {
  195. case CHIP_BARTS:
  196. io_mc_regs = (u32 *)&barts_io_mc_regs;
  197. ucode_size = BTC_MC_UCODE_SIZE;
  198. regs_size = BTC_IO_MC_REGS_SIZE;
  199. break;
  200. case CHIP_TURKS:
  201. io_mc_regs = (u32 *)&turks_io_mc_regs;
  202. ucode_size = BTC_MC_UCODE_SIZE;
  203. regs_size = BTC_IO_MC_REGS_SIZE;
  204. break;
  205. case CHIP_CAICOS:
  206. default:
  207. io_mc_regs = (u32 *)&caicos_io_mc_regs;
  208. ucode_size = BTC_MC_UCODE_SIZE;
  209. regs_size = BTC_IO_MC_REGS_SIZE;
  210. break;
  211. case CHIP_CAYMAN:
  212. io_mc_regs = (u32 *)&cayman_io_mc_regs;
  213. ucode_size = CAYMAN_MC_UCODE_SIZE;
  214. regs_size = BTC_IO_MC_REGS_SIZE;
  215. break;
  216. }
  217. mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
  218. running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
  219. if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
  220. if (running) {
  221. blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
  222. WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
  223. }
  224. /* reset the engine and set to writable */
  225. WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
  226. WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
  227. /* load mc io regs */
  228. for (i = 0; i < regs_size; i++) {
  229. WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
  230. WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
  231. }
  232. /* load the MC ucode */
  233. fw_data = (const __be32 *)rdev->mc_fw->data;
  234. for (i = 0; i < ucode_size; i++)
  235. WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
  236. /* put the engine back into the active state */
  237. WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
  238. WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
  239. WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
  240. /* wait for training to complete */
  241. while (!(RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD))
  242. udelay(10);
  243. if (running)
  244. WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
  245. }
  246. return 0;
  247. }
  248. int ni_init_microcode(struct radeon_device *rdev)
  249. {
  250. struct platform_device *pdev;
  251. const char *chip_name;
  252. const char *rlc_chip_name;
  253. size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
  254. char fw_name[30];
  255. int err;
  256. DRM_DEBUG("\n");
  257. pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
  258. err = IS_ERR(pdev);
  259. if (err) {
  260. printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
  261. return -EINVAL;
  262. }
  263. switch (rdev->family) {
  264. case CHIP_BARTS:
  265. chip_name = "BARTS";
  266. rlc_chip_name = "BTC";
  267. pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
  268. me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
  269. rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
  270. mc_req_size = BTC_MC_UCODE_SIZE * 4;
  271. break;
  272. case CHIP_TURKS:
  273. chip_name = "TURKS";
  274. rlc_chip_name = "BTC";
  275. pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
  276. me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
  277. rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
  278. mc_req_size = BTC_MC_UCODE_SIZE * 4;
  279. break;
  280. case CHIP_CAICOS:
  281. chip_name = "CAICOS";
  282. rlc_chip_name = "BTC";
  283. pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
  284. me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
  285. rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
  286. mc_req_size = BTC_MC_UCODE_SIZE * 4;
  287. break;
  288. case CHIP_CAYMAN:
  289. chip_name = "CAYMAN";
  290. rlc_chip_name = "CAYMAN";
  291. pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
  292. me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
  293. rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
  294. mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
  295. break;
  296. default: BUG();
  297. }
  298. DRM_INFO("Loading %s Microcode\n", chip_name);
  299. snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
  300. err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
  301. if (err)
  302. goto out;
  303. if (rdev->pfp_fw->size != pfp_req_size) {
  304. printk(KERN_ERR
  305. "ni_cp: Bogus length %zu in firmware \"%s\"\n",
  306. rdev->pfp_fw->size, fw_name);
  307. err = -EINVAL;
  308. goto out;
  309. }
  310. snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
  311. err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
  312. if (err)
  313. goto out;
  314. if (rdev->me_fw->size != me_req_size) {
  315. printk(KERN_ERR
  316. "ni_cp: Bogus length %zu in firmware \"%s\"\n",
  317. rdev->me_fw->size, fw_name);
  318. err = -EINVAL;
  319. }
  320. snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
  321. err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
  322. if (err)
  323. goto out;
  324. if (rdev->rlc_fw->size != rlc_req_size) {
  325. printk(KERN_ERR
  326. "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
  327. rdev->rlc_fw->size, fw_name);
  328. err = -EINVAL;
  329. }
  330. snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
  331. err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
  332. if (err)
  333. goto out;
  334. if (rdev->mc_fw->size != mc_req_size) {
  335. printk(KERN_ERR
  336. "ni_mc: Bogus length %zu in firmware \"%s\"\n",
  337. rdev->mc_fw->size, fw_name);
  338. err = -EINVAL;
  339. }
  340. out:
  341. platform_device_unregister(pdev);
  342. if (err) {
  343. if (err != -EINVAL)
  344. printk(KERN_ERR
  345. "ni_cp: Failed to load firmware \"%s\"\n",
  346. fw_name);
  347. release_firmware(rdev->pfp_fw);
  348. rdev->pfp_fw = NULL;
  349. release_firmware(rdev->me_fw);
  350. rdev->me_fw = NULL;
  351. release_firmware(rdev->rlc_fw);
  352. rdev->rlc_fw = NULL;
  353. release_firmware(rdev->mc_fw);
  354. rdev->mc_fw = NULL;
  355. }
  356. return err;
  357. }
  358. /*
  359. * Core functions
  360. */
  361. static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
  362. u32 num_tile_pipes,
  363. u32 num_backends_per_asic,
  364. u32 *backend_disable_mask_per_asic,
  365. u32 num_shader_engines)
  366. {
  367. u32 backend_map = 0;
  368. u32 enabled_backends_mask = 0;
  369. u32 enabled_backends_count = 0;
  370. u32 num_backends_per_se;
  371. u32 cur_pipe;
  372. u32 swizzle_pipe[CAYMAN_MAX_PIPES];
  373. u32 cur_backend = 0;
  374. u32 i;
  375. bool force_no_swizzle;
  376. /* force legal values */
  377. if (num_tile_pipes < 1)
  378. num_tile_pipes = 1;
  379. if (num_tile_pipes > rdev->config.cayman.max_tile_pipes)
  380. num_tile_pipes = rdev->config.cayman.max_tile_pipes;
  381. if (num_shader_engines < 1)
  382. num_shader_engines = 1;
  383. if (num_shader_engines > rdev->config.cayman.max_shader_engines)
  384. num_shader_engines = rdev->config.cayman.max_shader_engines;
  385. if (num_backends_per_asic > num_shader_engines)
  386. num_backends_per_asic = num_shader_engines;
  387. if (num_backends_per_asic > (rdev->config.cayman.max_backends_per_se * num_shader_engines))
  388. num_backends_per_asic = rdev->config.cayman.max_backends_per_se * num_shader_engines;
  389. /* make sure we have the same number of backends per se */
  390. num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines);
  391. /* set up the number of backends per se */
  392. num_backends_per_se = num_backends_per_asic / num_shader_engines;
  393. if (num_backends_per_se > rdev->config.cayman.max_backends_per_se) {
  394. num_backends_per_se = rdev->config.cayman.max_backends_per_se;
  395. num_backends_per_asic = num_backends_per_se * num_shader_engines;
  396. }
  397. /* create enable mask and count for enabled backends */
  398. for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
  399. if (((*backend_disable_mask_per_asic >> i) & 1) == 0) {
  400. enabled_backends_mask |= (1 << i);
  401. ++enabled_backends_count;
  402. }
  403. if (enabled_backends_count == num_backends_per_asic)
  404. break;
  405. }
  406. /* force the backends mask to match the current number of backends */
  407. if (enabled_backends_count != num_backends_per_asic) {
  408. u32 this_backend_enabled;
  409. u32 shader_engine;
  410. u32 backend_per_se;
  411. enabled_backends_mask = 0;
  412. enabled_backends_count = 0;
  413. *backend_disable_mask_per_asic = CAYMAN_MAX_BACKENDS_MASK;
  414. for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
  415. /* calc the current se */
  416. shader_engine = i / rdev->config.cayman.max_backends_per_se;
  417. /* calc the backend per se */
  418. backend_per_se = i % rdev->config.cayman.max_backends_per_se;
  419. /* default to not enabled */
  420. this_backend_enabled = 0;
  421. if ((shader_engine < num_shader_engines) &&
  422. (backend_per_se < num_backends_per_se))
  423. this_backend_enabled = 1;
  424. if (this_backend_enabled) {
  425. enabled_backends_mask |= (1 << i);
  426. *backend_disable_mask_per_asic &= ~(1 << i);
  427. ++enabled_backends_count;
  428. }
  429. }
  430. }
  431. memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * CAYMAN_MAX_PIPES);
  432. switch (rdev->family) {
  433. case CHIP_CAYMAN:
  434. force_no_swizzle = true;
  435. break;
  436. default:
  437. force_no_swizzle = false;
  438. break;
  439. }
  440. if (force_no_swizzle) {
  441. bool last_backend_enabled = false;
  442. force_no_swizzle = false;
  443. for (i = 0; i < CAYMAN_MAX_BACKENDS; ++i) {
  444. if (((enabled_backends_mask >> i) & 1) == 1) {
  445. if (last_backend_enabled)
  446. force_no_swizzle = true;
  447. last_backend_enabled = true;
  448. } else
  449. last_backend_enabled = false;
  450. }
  451. }
  452. switch (num_tile_pipes) {
  453. case 1:
  454. case 3:
  455. case 5:
  456. case 7:
  457. DRM_ERROR("odd number of pipes!\n");
  458. break;
  459. case 2:
  460. swizzle_pipe[0] = 0;
  461. swizzle_pipe[1] = 1;
  462. break;
  463. case 4:
  464. if (force_no_swizzle) {
  465. swizzle_pipe[0] = 0;
  466. swizzle_pipe[1] = 1;
  467. swizzle_pipe[2] = 2;
  468. swizzle_pipe[3] = 3;
  469. } else {
  470. swizzle_pipe[0] = 0;
  471. swizzle_pipe[1] = 2;
  472. swizzle_pipe[2] = 1;
  473. swizzle_pipe[3] = 3;
  474. }
  475. break;
  476. case 6:
  477. if (force_no_swizzle) {
  478. swizzle_pipe[0] = 0;
  479. swizzle_pipe[1] = 1;
  480. swizzle_pipe[2] = 2;
  481. swizzle_pipe[3] = 3;
  482. swizzle_pipe[4] = 4;
  483. swizzle_pipe[5] = 5;
  484. } else {
  485. swizzle_pipe[0] = 0;
  486. swizzle_pipe[1] = 2;
  487. swizzle_pipe[2] = 4;
  488. swizzle_pipe[3] = 1;
  489. swizzle_pipe[4] = 3;
  490. swizzle_pipe[5] = 5;
  491. }
  492. break;
  493. case 8:
  494. if (force_no_swizzle) {
  495. swizzle_pipe[0] = 0;
  496. swizzle_pipe[1] = 1;
  497. swizzle_pipe[2] = 2;
  498. swizzle_pipe[3] = 3;
  499. swizzle_pipe[4] = 4;
  500. swizzle_pipe[5] = 5;
  501. swizzle_pipe[6] = 6;
  502. swizzle_pipe[7] = 7;
  503. } else {
  504. swizzle_pipe[0] = 0;
  505. swizzle_pipe[1] = 2;
  506. swizzle_pipe[2] = 4;
  507. swizzle_pipe[3] = 6;
  508. swizzle_pipe[4] = 1;
  509. swizzle_pipe[5] = 3;
  510. swizzle_pipe[6] = 5;
  511. swizzle_pipe[7] = 7;
  512. }
  513. break;
  514. }
  515. for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) {
  516. while (((1 << cur_backend) & enabled_backends_mask) == 0)
  517. cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS;
  518. backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4)));
  519. cur_backend = (cur_backend + 1) % CAYMAN_MAX_BACKENDS;
  520. }
  521. return backend_map;
  522. }
  523. static void cayman_program_channel_remap(struct radeon_device *rdev)
  524. {
  525. u32 tcp_chan_steer_lo, tcp_chan_steer_hi, mc_shared_chremap, tmp;
  526. tmp = RREG32(MC_SHARED_CHMAP);
  527. switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
  528. case 0:
  529. case 1:
  530. case 2:
  531. case 3:
  532. default:
  533. /* default mapping */
  534. mc_shared_chremap = 0x00fac688;
  535. break;
  536. }
  537. switch (rdev->family) {
  538. case CHIP_CAYMAN:
  539. default:
  540. //tcp_chan_steer_lo = 0x54763210
  541. tcp_chan_steer_lo = 0x76543210;
  542. tcp_chan_steer_hi = 0x0000ba98;
  543. break;
  544. }
  545. WREG32(TCP_CHAN_STEER_LO, tcp_chan_steer_lo);
  546. WREG32(TCP_CHAN_STEER_HI, tcp_chan_steer_hi);
  547. WREG32(MC_SHARED_CHREMAP, mc_shared_chremap);
  548. }
  549. static u32 cayman_get_disable_mask_per_asic(struct radeon_device *rdev,
  550. u32 disable_mask_per_se,
  551. u32 max_disable_mask_per_se,
  552. u32 num_shader_engines)
  553. {
  554. u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se);
  555. u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se;
  556. if (num_shader_engines == 1)
  557. return disable_mask_per_asic;
  558. else if (num_shader_engines == 2)
  559. return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se);
  560. else
  561. return 0xffffffff;
  562. }
  563. static void cayman_gpu_init(struct radeon_device *rdev)
  564. {
  565. u32 cc_rb_backend_disable = 0;
  566. u32 cc_gc_shader_pipe_config;
  567. u32 gb_addr_config = 0;
  568. u32 mc_shared_chmap, mc_arb_ramcfg;
  569. u32 gb_backend_map;
  570. u32 cgts_tcc_disable;
  571. u32 sx_debug_1;
  572. u32 smx_dc_ctl0;
  573. u32 gc_user_shader_pipe_config;
  574. u32 gc_user_rb_backend_disable;
  575. u32 cgts_user_tcc_disable;
  576. u32 cgts_sm_ctrl_reg;
  577. u32 hdp_host_path_cntl;
  578. u32 tmp;
  579. int i, j;
  580. switch (rdev->family) {
  581. case CHIP_CAYMAN:
  582. default:
  583. rdev->config.cayman.max_shader_engines = 2;
  584. rdev->config.cayman.max_pipes_per_simd = 4;
  585. rdev->config.cayman.max_tile_pipes = 8;
  586. rdev->config.cayman.max_simds_per_se = 12;
  587. rdev->config.cayman.max_backends_per_se = 4;
  588. rdev->config.cayman.max_texture_channel_caches = 8;
  589. rdev->config.cayman.max_gprs = 256;
  590. rdev->config.cayman.max_threads = 256;
  591. rdev->config.cayman.max_gs_threads = 32;
  592. rdev->config.cayman.max_stack_entries = 512;
  593. rdev->config.cayman.sx_num_of_sets = 8;
  594. rdev->config.cayman.sx_max_export_size = 256;
  595. rdev->config.cayman.sx_max_export_pos_size = 64;
  596. rdev->config.cayman.sx_max_export_smx_size = 192;
  597. rdev->config.cayman.max_hw_contexts = 8;
  598. rdev->config.cayman.sq_num_cf_insts = 2;
  599. rdev->config.cayman.sc_prim_fifo_size = 0x100;
  600. rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
  601. rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
  602. break;
  603. }
  604. /* Initialize HDP */
  605. for (i = 0, j = 0; i < 32; i++, j += 0x18) {
  606. WREG32((0x2c14 + j), 0x00000000);
  607. WREG32((0x2c18 + j), 0x00000000);
  608. WREG32((0x2c1c + j), 0x00000000);
  609. WREG32((0x2c20 + j), 0x00000000);
  610. WREG32((0x2c24 + j), 0x00000000);
  611. }
  612. WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
  613. mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
  614. mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
  615. cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE);
  616. cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG);
  617. cgts_tcc_disable = RREG32(CGTS_TCC_DISABLE);
  618. gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE);
  619. gc_user_shader_pipe_config = RREG32(GC_USER_SHADER_PIPE_CONFIG);
  620. cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE);
  621. rdev->config.cayman.num_shader_engines = rdev->config.cayman.max_shader_engines;
  622. tmp = ((~gc_user_shader_pipe_config) & INACTIVE_QD_PIPES_MASK) >> INACTIVE_QD_PIPES_SHIFT;
  623. rdev->config.cayman.num_shader_pipes_per_simd = r600_count_pipe_bits(tmp);
  624. rdev->config.cayman.num_tile_pipes = rdev->config.cayman.max_tile_pipes;
  625. tmp = ((~gc_user_shader_pipe_config) & INACTIVE_SIMDS_MASK) >> INACTIVE_SIMDS_SHIFT;
  626. rdev->config.cayman.num_simds_per_se = r600_count_pipe_bits(tmp);
  627. tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
  628. rdev->config.cayman.num_backends_per_se = r600_count_pipe_bits(tmp);
  629. tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT;
  630. rdev->config.cayman.backend_disable_mask_per_asic =
  631. cayman_get_disable_mask_per_asic(rdev, tmp, CAYMAN_MAX_BACKENDS_PER_SE_MASK,
  632. rdev->config.cayman.num_shader_engines);
  633. rdev->config.cayman.backend_map =
  634. cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes,
  635. rdev->config.cayman.num_backends_per_se *
  636. rdev->config.cayman.num_shader_engines,
  637. &rdev->config.cayman.backend_disable_mask_per_asic,
  638. rdev->config.cayman.num_shader_engines);
  639. tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT;
  640. rdev->config.cayman.num_texture_channel_caches = r600_count_pipe_bits(tmp);
  641. tmp = (mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT;
  642. rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
  643. if (rdev->config.cayman.mem_max_burst_length_bytes > 512)
  644. rdev->config.cayman.mem_max_burst_length_bytes = 512;
  645. tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
  646. rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
  647. if (rdev->config.cayman.mem_row_size_in_kb > 4)
  648. rdev->config.cayman.mem_row_size_in_kb = 4;
  649. /* XXX use MC settings? */
  650. rdev->config.cayman.shader_engine_tile_size = 32;
  651. rdev->config.cayman.num_gpus = 1;
  652. rdev->config.cayman.multi_gpu_tile_size = 64;
  653. //gb_addr_config = 0x02011003
  654. #if 0
  655. gb_addr_config = RREG32(GB_ADDR_CONFIG);
  656. #else
  657. gb_addr_config = 0;
  658. switch (rdev->config.cayman.num_tile_pipes) {
  659. case 1:
  660. default:
  661. gb_addr_config |= NUM_PIPES(0);
  662. break;
  663. case 2:
  664. gb_addr_config |= NUM_PIPES(1);
  665. break;
  666. case 4:
  667. gb_addr_config |= NUM_PIPES(2);
  668. break;
  669. case 8:
  670. gb_addr_config |= NUM_PIPES(3);
  671. break;
  672. }
  673. tmp = (rdev->config.cayman.mem_max_burst_length_bytes / 256) - 1;
  674. gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp);
  675. gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.cayman.num_shader_engines - 1);
  676. tmp = (rdev->config.cayman.shader_engine_tile_size / 16) - 1;
  677. gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp);
  678. switch (rdev->config.cayman.num_gpus) {
  679. case 1:
  680. default:
  681. gb_addr_config |= NUM_GPUS(0);
  682. break;
  683. case 2:
  684. gb_addr_config |= NUM_GPUS(1);
  685. break;
  686. case 4:
  687. gb_addr_config |= NUM_GPUS(2);
  688. break;
  689. }
  690. switch (rdev->config.cayman.multi_gpu_tile_size) {
  691. case 16:
  692. gb_addr_config |= MULTI_GPU_TILE_SIZE(0);
  693. break;
  694. case 32:
  695. default:
  696. gb_addr_config |= MULTI_GPU_TILE_SIZE(1);
  697. break;
  698. case 64:
  699. gb_addr_config |= MULTI_GPU_TILE_SIZE(2);
  700. break;
  701. case 128:
  702. gb_addr_config |= MULTI_GPU_TILE_SIZE(3);
  703. break;
  704. }
  705. switch (rdev->config.cayman.mem_row_size_in_kb) {
  706. case 1:
  707. default:
  708. gb_addr_config |= ROW_SIZE(0);
  709. break;
  710. case 2:
  711. gb_addr_config |= ROW_SIZE(1);
  712. break;
  713. case 4:
  714. gb_addr_config |= ROW_SIZE(2);
  715. break;
  716. }
  717. #endif
  718. tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
  719. rdev->config.cayman.num_tile_pipes = (1 << tmp);
  720. tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
  721. rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
  722. tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
  723. rdev->config.cayman.num_shader_engines = tmp + 1;
  724. tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
  725. rdev->config.cayman.num_gpus = tmp + 1;
  726. tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
  727. rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
  728. tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
  729. rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
  730. //gb_backend_map = 0x76541032;
  731. #if 0
  732. gb_backend_map = RREG32(GB_BACKEND_MAP);
  733. #else
  734. gb_backend_map =
  735. cayman_get_tile_pipe_to_backend_map(rdev, rdev->config.cayman.num_tile_pipes,
  736. rdev->config.cayman.num_backends_per_se *
  737. rdev->config.cayman.num_shader_engines,
  738. &rdev->config.cayman.backend_disable_mask_per_asic,
  739. rdev->config.cayman.num_shader_engines);
  740. #endif
  741. /* setup tiling info dword. gb_addr_config is not adequate since it does
  742. * not have bank info, so create a custom tiling dword.
  743. * bits 3:0 num_pipes
  744. * bits 7:4 num_banks
  745. * bits 11:8 group_size
  746. * bits 15:12 row_size
  747. */
  748. rdev->config.cayman.tile_config = 0;
  749. switch (rdev->config.cayman.num_tile_pipes) {
  750. case 1:
  751. default:
  752. rdev->config.cayman.tile_config |= (0 << 0);
  753. break;
  754. case 2:
  755. rdev->config.cayman.tile_config |= (1 << 0);
  756. break;
  757. case 4:
  758. rdev->config.cayman.tile_config |= (2 << 0);
  759. break;
  760. case 8:
  761. rdev->config.cayman.tile_config |= (3 << 0);
  762. break;
  763. }
  764. rdev->config.cayman.tile_config |=
  765. ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
  766. rdev->config.cayman.tile_config |=
  767. (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
  768. rdev->config.cayman.tile_config |=
  769. ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
  770. WREG32(GB_BACKEND_MAP, gb_backend_map);
  771. WREG32(GB_ADDR_CONFIG, gb_addr_config);
  772. WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
  773. WREG32(HDP_ADDR_CONFIG, gb_addr_config);
  774. cayman_program_channel_remap(rdev);
  775. /* primary versions */
  776. WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable);
  777. WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
  778. WREG32(CC_GC_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
  779. WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
  780. WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
  781. /* user versions */
  782. WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable);
  783. WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable);
  784. WREG32(GC_USER_SHADER_PIPE_CONFIG, cc_gc_shader_pipe_config);
  785. WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
  786. WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
  787. /* reprogram the shader complex */
  788. cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
  789. for (i = 0; i < 16; i++)
  790. WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
  791. WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
  792. /* set HW defaults for 3D engine */
  793. WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
  794. sx_debug_1 = RREG32(SX_DEBUG_1);
  795. sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
  796. WREG32(SX_DEBUG_1, sx_debug_1);
  797. smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
  798. smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
  799. smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.evergreen.sx_num_of_sets);
  800. WREG32(SMX_DC_CTL0, smx_dc_ctl0);
  801. WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
  802. /* need to be explicitly zero-ed */
  803. WREG32(VGT_OFFCHIP_LDS_BASE, 0);
  804. WREG32(SQ_LSTMP_RING_BASE, 0);
  805. WREG32(SQ_HSTMP_RING_BASE, 0);
  806. WREG32(SQ_ESTMP_RING_BASE, 0);
  807. WREG32(SQ_GSTMP_RING_BASE, 0);
  808. WREG32(SQ_VSTMP_RING_BASE, 0);
  809. WREG32(SQ_PSTMP_RING_BASE, 0);
  810. WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
  811. WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_size / 4) - 1) |
  812. POSITION_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_pos_size / 4) - 1) |
  813. SMX_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_smx_size / 4) - 1)));
  814. WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.evergreen.sc_prim_fifo_size) |
  815. SC_HIZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_hiz_tile_fifo_size) |
  816. SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.evergreen.sc_earlyz_tile_fifo_size)));
  817. WREG32(VGT_NUM_INSTANCES, 1);
  818. WREG32(CP_PERFMON_CNTL, 0);
  819. WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.evergreen.sq_num_cf_insts) |
  820. FETCH_FIFO_HIWATER(0x4) |
  821. DONE_FIFO_HIWATER(0xe0) |
  822. ALU_UPDATE_FIFO_HIWATER(0x8)));
  823. WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
  824. WREG32(SQ_CONFIG, (VC_ENABLE |
  825. EXPORT_SRC_C |
  826. GFX_PRIO(0) |
  827. CS1_PRIO(0) |
  828. CS2_PRIO(1)));
  829. WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
  830. WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
  831. FORCE_EOV_MAX_REZ_CNT(255)));
  832. WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
  833. AUTO_INVLD_EN(ES_AND_GS_AUTO));
  834. WREG32(VGT_GS_VERTEX_REUSE, 16);
  835. WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
  836. WREG32(CB_PERF_CTR0_SEL_0, 0);
  837. WREG32(CB_PERF_CTR0_SEL_1, 0);
  838. WREG32(CB_PERF_CTR1_SEL_0, 0);
  839. WREG32(CB_PERF_CTR1_SEL_1, 0);
  840. WREG32(CB_PERF_CTR2_SEL_0, 0);
  841. WREG32(CB_PERF_CTR2_SEL_1, 0);
  842. WREG32(CB_PERF_CTR3_SEL_0, 0);
  843. WREG32(CB_PERF_CTR3_SEL_1, 0);
  844. hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
  845. WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
  846. WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
  847. udelay(50);
  848. }
  849. /*
  850. * GART
  851. */
  852. void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
  853. {
  854. /* flush hdp cache */
  855. WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
  856. /* bits 0-7 are the VM contexts0-7 */
  857. WREG32(VM_INVALIDATE_REQUEST, 1);
  858. }
  859. int cayman_pcie_gart_enable(struct radeon_device *rdev)
  860. {
  861. int r;
  862. if (rdev->gart.table.vram.robj == NULL) {
  863. dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
  864. return -EINVAL;
  865. }
  866. r = radeon_gart_table_vram_pin(rdev);
  867. if (r)
  868. return r;
  869. radeon_gart_restore(rdev);
  870. /* Setup TLB control */
  871. WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB |
  872. ENABLE_L1_FRAGMENT_PROCESSING |
  873. SYSTEM_ACCESS_MODE_NOT_IN_SYS |
  874. SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
  875. /* Setup L2 cache */
  876. WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
  877. ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
  878. ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
  879. EFFECTIVE_L2_QUEUE_SIZE(7) |
  880. CONTEXT1_IDENTITY_ACCESS_MODE(1));
  881. WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
  882. WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
  883. L2_CACHE_BIGK_FRAGMENT_SIZE(6));
  884. /* setup context0 */
  885. WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
  886. WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
  887. WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
  888. WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
  889. (u32)(rdev->dummy_page.addr >> 12));
  890. WREG32(VM_CONTEXT0_CNTL2, 0);
  891. WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
  892. RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
  893. /* disable context1-7 */
  894. WREG32(VM_CONTEXT1_CNTL2, 0);
  895. WREG32(VM_CONTEXT1_CNTL, 0);
  896. cayman_pcie_gart_tlb_flush(rdev);
  897. rdev->gart.ready = true;
  898. return 0;
  899. }
  900. void cayman_pcie_gart_disable(struct radeon_device *rdev)
  901. {
  902. int r;
  903. /* Disable all tables */
  904. WREG32(VM_CONTEXT0_CNTL, 0);
  905. WREG32(VM_CONTEXT1_CNTL, 0);
  906. /* Setup TLB control */
  907. WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
  908. SYSTEM_ACCESS_MODE_NOT_IN_SYS |
  909. SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
  910. /* Setup L2 cache */
  911. WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
  912. ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
  913. EFFECTIVE_L2_QUEUE_SIZE(7) |
  914. CONTEXT1_IDENTITY_ACCESS_MODE(1));
  915. WREG32(VM_L2_CNTL2, 0);
  916. WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
  917. L2_CACHE_BIGK_FRAGMENT_SIZE(6));
  918. if (rdev->gart.table.vram.robj) {
  919. r = radeon_bo_reserve(rdev->gart.table.vram.robj, false);
  920. if (likely(r == 0)) {
  921. radeon_bo_kunmap(rdev->gart.table.vram.robj);
  922. radeon_bo_unpin(rdev->gart.table.vram.robj);
  923. radeon_bo_unreserve(rdev->gart.table.vram.robj);
  924. }
  925. }
  926. }
  927. void cayman_pcie_gart_fini(struct radeon_device *rdev)
  928. {
  929. cayman_pcie_gart_disable(rdev);
  930. radeon_gart_table_vram_free(rdev);
  931. radeon_gart_fini(rdev);
  932. }
  933. /*
  934. * CP.
  935. */
  936. static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
  937. {
  938. if (enable)
  939. WREG32(CP_ME_CNTL, 0);
  940. else {
  941. rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
  942. WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
  943. WREG32(SCRATCH_UMSK, 0);
  944. }
  945. }
  946. static int cayman_cp_load_microcode(struct radeon_device *rdev)
  947. {
  948. const __be32 *fw_data;
  949. int i;
  950. if (!rdev->me_fw || !rdev->pfp_fw)
  951. return -EINVAL;
  952. cayman_cp_enable(rdev, false);
  953. fw_data = (const __be32 *)rdev->pfp_fw->data;
  954. WREG32(CP_PFP_UCODE_ADDR, 0);
  955. for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
  956. WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
  957. WREG32(CP_PFP_UCODE_ADDR, 0);
  958. fw_data = (const __be32 *)rdev->me_fw->data;
  959. WREG32(CP_ME_RAM_WADDR, 0);
  960. for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
  961. WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
  962. WREG32(CP_PFP_UCODE_ADDR, 0);
  963. WREG32(CP_ME_RAM_WADDR, 0);
  964. WREG32(CP_ME_RAM_RADDR, 0);
  965. return 0;
  966. }
  967. static int cayman_cp_start(struct radeon_device *rdev)
  968. {
  969. int r, i;
  970. r = radeon_ring_lock(rdev, 7);
  971. if (r) {
  972. DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
  973. return r;
  974. }
  975. radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5));
  976. radeon_ring_write(rdev, 0x1);
  977. radeon_ring_write(rdev, 0x0);
  978. radeon_ring_write(rdev, rdev->config.cayman.max_hw_contexts - 1);
  979. radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
  980. radeon_ring_write(rdev, 0);
  981. radeon_ring_write(rdev, 0);
  982. radeon_ring_unlock_commit(rdev);
  983. cayman_cp_enable(rdev, true);
  984. r = radeon_ring_lock(rdev, cayman_default_size + 15);
  985. if (r) {
  986. DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
  987. return r;
  988. }
  989. /* setup clear context state */
  990. radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
  991. radeon_ring_write(rdev, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
  992. for (i = 0; i < cayman_default_size; i++)
  993. radeon_ring_write(rdev, cayman_default_state[i]);
  994. radeon_ring_write(rdev, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
  995. radeon_ring_write(rdev, PACKET3_PREAMBLE_END_CLEAR_STATE);
  996. /* set clear context state */
  997. radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
  998. radeon_ring_write(rdev, 0);
  999. /* SQ_VTX_BASE_VTX_LOC */
  1000. radeon_ring_write(rdev, 0xc0026f00);
  1001. radeon_ring_write(rdev, 0x00000000);
  1002. radeon_ring_write(rdev, 0x00000000);
  1003. radeon_ring_write(rdev, 0x00000000);
  1004. /* Clear consts */
  1005. radeon_ring_write(rdev, 0xc0036f00);
  1006. radeon_ring_write(rdev, 0x00000bc4);
  1007. radeon_ring_write(rdev, 0xffffffff);
  1008. radeon_ring_write(rdev, 0xffffffff);
  1009. radeon_ring_write(rdev, 0xffffffff);
  1010. radeon_ring_unlock_commit(rdev);
  1011. /* XXX init other rings */
  1012. return 0;
  1013. }
  1014. int cayman_cp_resume(struct radeon_device *rdev)
  1015. {
  1016. u32 tmp;
  1017. u32 rb_bufsz;
  1018. int r;
  1019. /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
  1020. WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
  1021. SOFT_RESET_PA |
  1022. SOFT_RESET_SH |
  1023. SOFT_RESET_VGT |
  1024. SOFT_RESET_SX));
  1025. RREG32(GRBM_SOFT_RESET);
  1026. mdelay(15);
  1027. WREG32(GRBM_SOFT_RESET, 0);
  1028. RREG32(GRBM_SOFT_RESET);
  1029. WREG32(CP_SEM_WAIT_TIMER, 0x4);
  1030. /* Set the write pointer delay */
  1031. WREG32(CP_RB_WPTR_DELAY, 0);
  1032. WREG32(CP_DEBUG, (1 << 27));
  1033. /* ring 0 - compute and gfx */
  1034. /* Set ring buffer size */
  1035. rb_bufsz = drm_order(rdev->cp.ring_size / 8);
  1036. tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  1037. #ifdef __BIG_ENDIAN
  1038. tmp |= BUF_SWAP_32BIT;
  1039. #endif
  1040. WREG32(CP_RB0_CNTL, tmp);
  1041. /* Initialize the ring buffer's read and write pointers */
  1042. WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
  1043. WREG32(CP_RB0_WPTR, 0);
  1044. /* set the wb address wether it's enabled or not */
  1045. WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
  1046. WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
  1047. WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
  1048. if (rdev->wb.enabled)
  1049. WREG32(SCRATCH_UMSK, 0xff);
  1050. else {
  1051. tmp |= RB_NO_UPDATE;
  1052. WREG32(SCRATCH_UMSK, 0);
  1053. }
  1054. mdelay(1);
  1055. WREG32(CP_RB0_CNTL, tmp);
  1056. WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8);
  1057. rdev->cp.rptr = RREG32(CP_RB0_RPTR);
  1058. rdev->cp.wptr = RREG32(CP_RB0_WPTR);
  1059. /* ring1 - compute only */
  1060. /* Set ring buffer size */
  1061. rb_bufsz = drm_order(rdev->cp1.ring_size / 8);
  1062. tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  1063. #ifdef __BIG_ENDIAN
  1064. tmp |= BUF_SWAP_32BIT;
  1065. #endif
  1066. WREG32(CP_RB1_CNTL, tmp);
  1067. /* Initialize the ring buffer's read and write pointers */
  1068. WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
  1069. WREG32(CP_RB1_WPTR, 0);
  1070. /* set the wb address wether it's enabled or not */
  1071. WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
  1072. WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
  1073. mdelay(1);
  1074. WREG32(CP_RB1_CNTL, tmp);
  1075. WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8);
  1076. rdev->cp1.rptr = RREG32(CP_RB1_RPTR);
  1077. rdev->cp1.wptr = RREG32(CP_RB1_WPTR);
  1078. /* ring2 - compute only */
  1079. /* Set ring buffer size */
  1080. rb_bufsz = drm_order(rdev->cp2.ring_size / 8);
  1081. tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
  1082. #ifdef __BIG_ENDIAN
  1083. tmp |= BUF_SWAP_32BIT;
  1084. #endif
  1085. WREG32(CP_RB2_CNTL, tmp);
  1086. /* Initialize the ring buffer's read and write pointers */
  1087. WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
  1088. WREG32(CP_RB2_WPTR, 0);
  1089. /* set the wb address wether it's enabled or not */
  1090. WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
  1091. WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
  1092. mdelay(1);
  1093. WREG32(CP_RB2_CNTL, tmp);
  1094. WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8);
  1095. rdev->cp2.rptr = RREG32(CP_RB2_RPTR);
  1096. rdev->cp2.wptr = RREG32(CP_RB2_WPTR);
  1097. /* start the rings */
  1098. cayman_cp_start(rdev);
  1099. rdev->cp.ready = true;
  1100. rdev->cp1.ready = true;
  1101. rdev->cp2.ready = true;
  1102. /* this only test cp0 */
  1103. r = radeon_ring_test(rdev);
  1104. if (r) {
  1105. rdev->cp.ready = false;
  1106. rdev->cp1.ready = false;
  1107. rdev->cp2.ready = false;
  1108. return r;
  1109. }
  1110. return 0;
  1111. }
  1112. bool cayman_gpu_is_lockup(struct radeon_device *rdev)
  1113. {
  1114. u32 srbm_status;
  1115. u32 grbm_status;
  1116. u32 grbm_status_se0, grbm_status_se1;
  1117. struct r100_gpu_lockup *lockup = &rdev->config.cayman.lockup;
  1118. int r;
  1119. srbm_status = RREG32(SRBM_STATUS);
  1120. grbm_status = RREG32(GRBM_STATUS);
  1121. grbm_status_se0 = RREG32(GRBM_STATUS_SE0);
  1122. grbm_status_se1 = RREG32(GRBM_STATUS_SE1);
  1123. if (!(grbm_status & GUI_ACTIVE)) {
  1124. r100_gpu_lockup_update(lockup, &rdev->cp);
  1125. return false;
  1126. }
  1127. /* force CP activities */
  1128. r = radeon_ring_lock(rdev, 2);
  1129. if (!r) {
  1130. /* PACKET2 NOP */
  1131. radeon_ring_write(rdev, 0x80000000);
  1132. radeon_ring_write(rdev, 0x80000000);
  1133. radeon_ring_unlock_commit(rdev);
  1134. }
  1135. /* XXX deal with CP0,1,2 */
  1136. rdev->cp.rptr = RREG32(CP_RB0_RPTR);
  1137. return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
  1138. }
  1139. static int cayman_gpu_soft_reset(struct radeon_device *rdev)
  1140. {
  1141. struct evergreen_mc_save save;
  1142. u32 grbm_reset = 0;
  1143. if (!(RREG32(GRBM_STATUS) & GUI_ACTIVE))
  1144. return 0;
  1145. dev_info(rdev->dev, "GPU softreset \n");
  1146. dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
  1147. RREG32(GRBM_STATUS));
  1148. dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
  1149. RREG32(GRBM_STATUS_SE0));
  1150. dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
  1151. RREG32(GRBM_STATUS_SE1));
  1152. dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
  1153. RREG32(SRBM_STATUS));
  1154. evergreen_mc_stop(rdev, &save);
  1155. if (evergreen_mc_wait_for_idle(rdev)) {
  1156. dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
  1157. }
  1158. /* Disable CP parsing/prefetching */
  1159. WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
  1160. /* reset all the gfx blocks */
  1161. grbm_reset = (SOFT_RESET_CP |
  1162. SOFT_RESET_CB |
  1163. SOFT_RESET_DB |
  1164. SOFT_RESET_GDS |
  1165. SOFT_RESET_PA |
  1166. SOFT_RESET_SC |
  1167. SOFT_RESET_SPI |
  1168. SOFT_RESET_SH |
  1169. SOFT_RESET_SX |
  1170. SOFT_RESET_TC |
  1171. SOFT_RESET_TA |
  1172. SOFT_RESET_VGT |
  1173. SOFT_RESET_IA);
  1174. dev_info(rdev->dev, " GRBM_SOFT_RESET=0x%08X\n", grbm_reset);
  1175. WREG32(GRBM_SOFT_RESET, grbm_reset);
  1176. (void)RREG32(GRBM_SOFT_RESET);
  1177. udelay(50);
  1178. WREG32(GRBM_SOFT_RESET, 0);
  1179. (void)RREG32(GRBM_SOFT_RESET);
  1180. /* Wait a little for things to settle down */
  1181. udelay(50);
  1182. dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
  1183. RREG32(GRBM_STATUS));
  1184. dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
  1185. RREG32(GRBM_STATUS_SE0));
  1186. dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
  1187. RREG32(GRBM_STATUS_SE1));
  1188. dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
  1189. RREG32(SRBM_STATUS));
  1190. evergreen_mc_resume(rdev, &save);
  1191. return 0;
  1192. }
  1193. int cayman_asic_reset(struct radeon_device *rdev)
  1194. {
  1195. return cayman_gpu_soft_reset(rdev);
  1196. }