r100.c 64 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279
  1. /*
  2. * Copyright 2008 Advanced Micro Devices, Inc.
  3. * Copyright 2008 Red Hat Inc.
  4. * Copyright 2009 Jerome Glisse.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a
  7. * copy of this software and associated documentation files (the "Software"),
  8. * to deal in the Software without restriction, including without limitation
  9. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10. * and/or sell copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22. * OTHER DEALINGS IN THE SOFTWARE.
  23. *
  24. * Authors: Dave Airlie
  25. * Alex Deucher
  26. * Jerome Glisse
  27. */
  28. #include <linux/seq_file.h>
  29. #include "drmP.h"
  30. #include "drm.h"
  31. #include "radeon_drm.h"
  32. #include "radeon_microcode.h"
  33. #include "radeon_reg.h"
  34. #include "radeon.h"
  35. /* This files gather functions specifics to:
  36. * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  37. *
  38. * Some of these functions might be used by newer ASICs.
  39. */
  40. void r100_hdp_reset(struct radeon_device *rdev);
  41. void r100_gpu_init(struct radeon_device *rdev);
  42. int r100_gui_wait_for_idle(struct radeon_device *rdev);
  43. int r100_mc_wait_for_idle(struct radeon_device *rdev);
  44. void r100_gpu_wait_for_vsync(struct radeon_device *rdev);
  45. void r100_gpu_wait_for_vsync2(struct radeon_device *rdev);
  46. int r100_debugfs_mc_info_init(struct radeon_device *rdev);
  47. /*
  48. * PCI GART
  49. */
  50. void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
  51. {
  52. /* TODO: can we do somethings here ? */
  53. /* It seems hw only cache one entry so we should discard this
  54. * entry otherwise if first GPU GART read hit this entry it
  55. * could end up in wrong address. */
  56. }
  57. int r100_pci_gart_enable(struct radeon_device *rdev)
  58. {
  59. uint32_t tmp;
  60. int r;
  61. /* Initialize common gart structure */
  62. r = radeon_gart_init(rdev);
  63. if (r) {
  64. return r;
  65. }
  66. if (rdev->gart.table.ram.ptr == NULL) {
  67. rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
  68. r = radeon_gart_table_ram_alloc(rdev);
  69. if (r) {
  70. return r;
  71. }
  72. }
  73. /* discard memory request outside of configured range */
  74. tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
  75. WREG32(RADEON_AIC_CNTL, tmp);
  76. /* set address range for PCI address translate */
  77. WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_location);
  78. tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
  79. WREG32(RADEON_AIC_HI_ADDR, tmp);
  80. /* Enable bus mastering */
  81. tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
  82. WREG32(RADEON_BUS_CNTL, tmp);
  83. /* set PCI GART page-table base address */
  84. WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
  85. tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
  86. WREG32(RADEON_AIC_CNTL, tmp);
  87. r100_pci_gart_tlb_flush(rdev);
  88. rdev->gart.ready = true;
  89. return 0;
  90. }
  91. void r100_pci_gart_disable(struct radeon_device *rdev)
  92. {
  93. uint32_t tmp;
  94. /* discard memory request outside of configured range */
  95. tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
  96. WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
  97. WREG32(RADEON_AIC_LO_ADDR, 0);
  98. WREG32(RADEON_AIC_HI_ADDR, 0);
  99. }
  100. int r100_pci_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr)
  101. {
  102. if (i < 0 || i > rdev->gart.num_gpu_pages) {
  103. return -EINVAL;
  104. }
  105. rdev->gart.table.ram.ptr[i] = cpu_to_le32(lower_32_bits(addr));
  106. return 0;
  107. }
  108. int r100_gart_enable(struct radeon_device *rdev)
  109. {
  110. if (rdev->flags & RADEON_IS_AGP) {
  111. r100_pci_gart_disable(rdev);
  112. return 0;
  113. }
  114. return r100_pci_gart_enable(rdev);
  115. }
  116. /*
  117. * MC
  118. */
  119. void r100_mc_disable_clients(struct radeon_device *rdev)
  120. {
  121. uint32_t ov0_scale_cntl, crtc_ext_cntl, crtc_gen_cntl, crtc2_gen_cntl;
  122. /* FIXME: is this function correct for rs100,rs200,rs300 ? */
  123. if (r100_gui_wait_for_idle(rdev)) {
  124. printk(KERN_WARNING "Failed to wait GUI idle while "
  125. "programming pipes. Bad things might happen.\n");
  126. }
  127. /* stop display and memory access */
  128. ov0_scale_cntl = RREG32(RADEON_OV0_SCALE_CNTL);
  129. WREG32(RADEON_OV0_SCALE_CNTL, ov0_scale_cntl & ~RADEON_SCALER_ENABLE);
  130. crtc_ext_cntl = RREG32(RADEON_CRTC_EXT_CNTL);
  131. WREG32(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl | RADEON_CRTC_DISPLAY_DIS);
  132. crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
  133. r100_gpu_wait_for_vsync(rdev);
  134. WREG32(RADEON_CRTC_GEN_CNTL,
  135. (crtc_gen_cntl & ~(RADEON_CRTC_CUR_EN | RADEON_CRTC_ICON_EN)) |
  136. RADEON_CRTC_DISP_REQ_EN_B | RADEON_CRTC_EXT_DISP_EN);
  137. if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
  138. crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
  139. r100_gpu_wait_for_vsync2(rdev);
  140. WREG32(RADEON_CRTC2_GEN_CNTL,
  141. (crtc2_gen_cntl &
  142. ~(RADEON_CRTC2_CUR_EN | RADEON_CRTC2_ICON_EN)) |
  143. RADEON_CRTC2_DISP_REQ_EN_B);
  144. }
  145. udelay(500);
  146. }
  147. void r100_mc_setup(struct radeon_device *rdev)
  148. {
  149. uint32_t tmp;
  150. int r;
  151. r = r100_debugfs_mc_info_init(rdev);
  152. if (r) {
  153. DRM_ERROR("Failed to register debugfs file for R100 MC !\n");
  154. }
  155. /* Write VRAM size in case we are limiting it */
  156. WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
  157. /* Novell bug 204882 for RN50/M6/M7 with 8/16/32MB VRAM,
  158. * if the aperture is 64MB but we have 32MB VRAM
  159. * we report only 32MB VRAM but we have to set MC_FB_LOCATION
  160. * to 64MB, otherwise the gpu accidentially dies */
  161. tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1;
  162. tmp = REG_SET(RADEON_MC_FB_TOP, tmp >> 16);
  163. tmp |= REG_SET(RADEON_MC_FB_START, rdev->mc.vram_location >> 16);
  164. WREG32(RADEON_MC_FB_LOCATION, tmp);
  165. /* Enable bus mastering */
  166. tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
  167. WREG32(RADEON_BUS_CNTL, tmp);
  168. if (rdev->flags & RADEON_IS_AGP) {
  169. tmp = rdev->mc.gtt_location + rdev->mc.gtt_size - 1;
  170. tmp = REG_SET(RADEON_MC_AGP_TOP, tmp >> 16);
  171. tmp |= REG_SET(RADEON_MC_AGP_START, rdev->mc.gtt_location >> 16);
  172. WREG32(RADEON_MC_AGP_LOCATION, tmp);
  173. WREG32(RADEON_AGP_BASE, rdev->mc.agp_base);
  174. } else {
  175. WREG32(RADEON_MC_AGP_LOCATION, 0x0FFFFFFF);
  176. WREG32(RADEON_AGP_BASE, 0);
  177. }
  178. tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
  179. tmp |= (7 << 28);
  180. WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
  181. (void)RREG32(RADEON_HOST_PATH_CNTL);
  182. WREG32(RADEON_HOST_PATH_CNTL, tmp);
  183. (void)RREG32(RADEON_HOST_PATH_CNTL);
  184. }
  185. int r100_mc_init(struct radeon_device *rdev)
  186. {
  187. int r;
  188. if (r100_debugfs_rbbm_init(rdev)) {
  189. DRM_ERROR("Failed to register debugfs file for RBBM !\n");
  190. }
  191. r100_gpu_init(rdev);
  192. /* Disable gart which also disable out of gart access */
  193. r100_pci_gart_disable(rdev);
  194. /* Setup GPU memory space */
  195. rdev->mc.gtt_location = 0xFFFFFFFFUL;
  196. if (rdev->flags & RADEON_IS_AGP) {
  197. r = radeon_agp_init(rdev);
  198. if (r) {
  199. printk(KERN_WARNING "[drm] Disabling AGP\n");
  200. rdev->flags &= ~RADEON_IS_AGP;
  201. rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
  202. } else {
  203. rdev->mc.gtt_location = rdev->mc.agp_base;
  204. }
  205. }
  206. r = radeon_mc_setup(rdev);
  207. if (r) {
  208. return r;
  209. }
  210. r100_mc_disable_clients(rdev);
  211. if (r100_mc_wait_for_idle(rdev)) {
  212. printk(KERN_WARNING "Failed to wait MC idle while "
  213. "programming pipes. Bad things might happen.\n");
  214. }
  215. r100_mc_setup(rdev);
  216. return 0;
  217. }
  218. void r100_mc_fini(struct radeon_device *rdev)
  219. {
  220. r100_pci_gart_disable(rdev);
  221. radeon_gart_table_ram_free(rdev);
  222. radeon_gart_fini(rdev);
  223. }
  224. /*
  225. * Fence emission
  226. */
  227. void r100_fence_ring_emit(struct radeon_device *rdev,
  228. struct radeon_fence *fence)
  229. {
  230. /* Who ever call radeon_fence_emit should call ring_lock and ask
  231. * for enough space (today caller are ib schedule and buffer move) */
  232. /* Wait until IDLE & CLEAN */
  233. radeon_ring_write(rdev, PACKET0(0x1720, 0));
  234. radeon_ring_write(rdev, (1 << 16) | (1 << 17));
  235. /* Emit fence sequence & fire IRQ */
  236. radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
  237. radeon_ring_write(rdev, fence->seq);
  238. radeon_ring_write(rdev, PACKET0(RADEON_GEN_INT_STATUS, 0));
  239. radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
  240. }
  241. /*
  242. * Writeback
  243. */
  244. int r100_wb_init(struct radeon_device *rdev)
  245. {
  246. int r;
  247. if (rdev->wb.wb_obj == NULL) {
  248. r = radeon_object_create(rdev, NULL, 4096,
  249. true,
  250. RADEON_GEM_DOMAIN_GTT,
  251. false, &rdev->wb.wb_obj);
  252. if (r) {
  253. DRM_ERROR("radeon: failed to create WB buffer (%d).\n", r);
  254. return r;
  255. }
  256. r = radeon_object_pin(rdev->wb.wb_obj,
  257. RADEON_GEM_DOMAIN_GTT,
  258. &rdev->wb.gpu_addr);
  259. if (r) {
  260. DRM_ERROR("radeon: failed to pin WB buffer (%d).\n", r);
  261. return r;
  262. }
  263. r = radeon_object_kmap(rdev->wb.wb_obj, (void **)&rdev->wb.wb);
  264. if (r) {
  265. DRM_ERROR("radeon: failed to map WB buffer (%d).\n", r);
  266. return r;
  267. }
  268. }
  269. WREG32(0x774, rdev->wb.gpu_addr);
  270. WREG32(0x70C, rdev->wb.gpu_addr + 1024);
  271. WREG32(0x770, 0xff);
  272. return 0;
  273. }
  274. void r100_wb_fini(struct radeon_device *rdev)
  275. {
  276. if (rdev->wb.wb_obj) {
  277. radeon_object_kunmap(rdev->wb.wb_obj);
  278. radeon_object_unpin(rdev->wb.wb_obj);
  279. radeon_object_unref(&rdev->wb.wb_obj);
  280. rdev->wb.wb = NULL;
  281. rdev->wb.wb_obj = NULL;
  282. }
  283. }
  284. int r100_copy_blit(struct radeon_device *rdev,
  285. uint64_t src_offset,
  286. uint64_t dst_offset,
  287. unsigned num_pages,
  288. struct radeon_fence *fence)
  289. {
  290. uint32_t cur_pages;
  291. uint32_t stride_bytes = PAGE_SIZE;
  292. uint32_t pitch;
  293. uint32_t stride_pixels;
  294. unsigned ndw;
  295. int num_loops;
  296. int r = 0;
  297. /* radeon limited to 16k stride */
  298. stride_bytes &= 0x3fff;
  299. /* radeon pitch is /64 */
  300. pitch = stride_bytes / 64;
  301. stride_pixels = stride_bytes / 4;
  302. num_loops = DIV_ROUND_UP(num_pages, 8191);
  303. /* Ask for enough room for blit + flush + fence */
  304. ndw = 64 + (10 * num_loops);
  305. r = radeon_ring_lock(rdev, ndw);
  306. if (r) {
  307. DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
  308. return -EINVAL;
  309. }
  310. while (num_pages > 0) {
  311. cur_pages = num_pages;
  312. if (cur_pages > 8191) {
  313. cur_pages = 8191;
  314. }
  315. num_pages -= cur_pages;
  316. /* pages are in Y direction - height
  317. page width in X direction - width */
  318. radeon_ring_write(rdev, PACKET3(PACKET3_BITBLT_MULTI, 8));
  319. radeon_ring_write(rdev,
  320. RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
  321. RADEON_GMC_DST_PITCH_OFFSET_CNTL |
  322. RADEON_GMC_SRC_CLIPPING |
  323. RADEON_GMC_DST_CLIPPING |
  324. RADEON_GMC_BRUSH_NONE |
  325. (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
  326. RADEON_GMC_SRC_DATATYPE_COLOR |
  327. RADEON_ROP3_S |
  328. RADEON_DP_SRC_SOURCE_MEMORY |
  329. RADEON_GMC_CLR_CMP_CNTL_DIS |
  330. RADEON_GMC_WR_MSK_DIS);
  331. radeon_ring_write(rdev, (pitch << 22) | (src_offset >> 10));
  332. radeon_ring_write(rdev, (pitch << 22) | (dst_offset >> 10));
  333. radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
  334. radeon_ring_write(rdev, 0);
  335. radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
  336. radeon_ring_write(rdev, num_pages);
  337. radeon_ring_write(rdev, num_pages);
  338. radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
  339. }
  340. radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
  341. radeon_ring_write(rdev, RADEON_RB2D_DC_FLUSH_ALL);
  342. radeon_ring_write(rdev, PACKET0(RADEON_WAIT_UNTIL, 0));
  343. radeon_ring_write(rdev,
  344. RADEON_WAIT_2D_IDLECLEAN |
  345. RADEON_WAIT_HOST_IDLECLEAN |
  346. RADEON_WAIT_DMA_GUI_IDLE);
  347. if (fence) {
  348. r = radeon_fence_emit(rdev, fence);
  349. }
  350. radeon_ring_unlock_commit(rdev);
  351. return r;
  352. }
  353. /*
  354. * CP
  355. */
  356. void r100_ring_start(struct radeon_device *rdev)
  357. {
  358. int r;
  359. r = radeon_ring_lock(rdev, 2);
  360. if (r) {
  361. return;
  362. }
  363. radeon_ring_write(rdev, PACKET0(RADEON_ISYNC_CNTL, 0));
  364. radeon_ring_write(rdev,
  365. RADEON_ISYNC_ANY2D_IDLE3D |
  366. RADEON_ISYNC_ANY3D_IDLE2D |
  367. RADEON_ISYNC_WAIT_IDLEGUI |
  368. RADEON_ISYNC_CPSCRATCH_IDLEGUI);
  369. radeon_ring_unlock_commit(rdev);
  370. }
  371. static void r100_cp_load_microcode(struct radeon_device *rdev)
  372. {
  373. int i;
  374. if (r100_gui_wait_for_idle(rdev)) {
  375. printk(KERN_WARNING "Failed to wait GUI idle while "
  376. "programming pipes. Bad things might happen.\n");
  377. }
  378. WREG32(RADEON_CP_ME_RAM_ADDR, 0);
  379. if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
  380. (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
  381. (rdev->family == CHIP_RS200)) {
  382. DRM_INFO("Loading R100 Microcode\n");
  383. for (i = 0; i < 256; i++) {
  384. WREG32(RADEON_CP_ME_RAM_DATAH, R100_cp_microcode[i][1]);
  385. WREG32(RADEON_CP_ME_RAM_DATAL, R100_cp_microcode[i][0]);
  386. }
  387. } else if ((rdev->family == CHIP_R200) ||
  388. (rdev->family == CHIP_RV250) ||
  389. (rdev->family == CHIP_RV280) ||
  390. (rdev->family == CHIP_RS300)) {
  391. DRM_INFO("Loading R200 Microcode\n");
  392. for (i = 0; i < 256; i++) {
  393. WREG32(RADEON_CP_ME_RAM_DATAH, R200_cp_microcode[i][1]);
  394. WREG32(RADEON_CP_ME_RAM_DATAL, R200_cp_microcode[i][0]);
  395. }
  396. } else if ((rdev->family == CHIP_R300) ||
  397. (rdev->family == CHIP_R350) ||
  398. (rdev->family == CHIP_RV350) ||
  399. (rdev->family == CHIP_RV380) ||
  400. (rdev->family == CHIP_RS400) ||
  401. (rdev->family == CHIP_RS480)) {
  402. DRM_INFO("Loading R300 Microcode\n");
  403. for (i = 0; i < 256; i++) {
  404. WREG32(RADEON_CP_ME_RAM_DATAH, R300_cp_microcode[i][1]);
  405. WREG32(RADEON_CP_ME_RAM_DATAL, R300_cp_microcode[i][0]);
  406. }
  407. } else if ((rdev->family == CHIP_R420) ||
  408. (rdev->family == CHIP_R423) ||
  409. (rdev->family == CHIP_RV410)) {
  410. DRM_INFO("Loading R400 Microcode\n");
  411. for (i = 0; i < 256; i++) {
  412. WREG32(RADEON_CP_ME_RAM_DATAH, R420_cp_microcode[i][1]);
  413. WREG32(RADEON_CP_ME_RAM_DATAL, R420_cp_microcode[i][0]);
  414. }
  415. } else if ((rdev->family == CHIP_RS690) ||
  416. (rdev->family == CHIP_RS740)) {
  417. DRM_INFO("Loading RS690/RS740 Microcode\n");
  418. for (i = 0; i < 256; i++) {
  419. WREG32(RADEON_CP_ME_RAM_DATAH, RS690_cp_microcode[i][1]);
  420. WREG32(RADEON_CP_ME_RAM_DATAL, RS690_cp_microcode[i][0]);
  421. }
  422. } else if (rdev->family == CHIP_RS600) {
  423. DRM_INFO("Loading RS600 Microcode\n");
  424. for (i = 0; i < 256; i++) {
  425. WREG32(RADEON_CP_ME_RAM_DATAH, RS600_cp_microcode[i][1]);
  426. WREG32(RADEON_CP_ME_RAM_DATAL, RS600_cp_microcode[i][0]);
  427. }
  428. } else if ((rdev->family == CHIP_RV515) ||
  429. (rdev->family == CHIP_R520) ||
  430. (rdev->family == CHIP_RV530) ||
  431. (rdev->family == CHIP_R580) ||
  432. (rdev->family == CHIP_RV560) ||
  433. (rdev->family == CHIP_RV570)) {
  434. DRM_INFO("Loading R500 Microcode\n");
  435. for (i = 0; i < 256; i++) {
  436. WREG32(RADEON_CP_ME_RAM_DATAH, R520_cp_microcode[i][1]);
  437. WREG32(RADEON_CP_ME_RAM_DATAL, R520_cp_microcode[i][0]);
  438. }
  439. }
  440. }
  441. int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
  442. {
  443. unsigned rb_bufsz;
  444. unsigned rb_blksz;
  445. unsigned max_fetch;
  446. unsigned pre_write_timer;
  447. unsigned pre_write_limit;
  448. unsigned indirect2_start;
  449. unsigned indirect1_start;
  450. uint32_t tmp;
  451. int r;
  452. if (r100_debugfs_cp_init(rdev)) {
  453. DRM_ERROR("Failed to register debugfs file for CP !\n");
  454. }
  455. /* Reset CP */
  456. tmp = RREG32(RADEON_CP_CSQ_STAT);
  457. if ((tmp & (1 << 31))) {
  458. DRM_INFO("radeon: cp busy (0x%08X) resetting\n", tmp);
  459. WREG32(RADEON_CP_CSQ_MODE, 0);
  460. WREG32(RADEON_CP_CSQ_CNTL, 0);
  461. WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
  462. tmp = RREG32(RADEON_RBBM_SOFT_RESET);
  463. mdelay(2);
  464. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  465. tmp = RREG32(RADEON_RBBM_SOFT_RESET);
  466. mdelay(2);
  467. tmp = RREG32(RADEON_CP_CSQ_STAT);
  468. if ((tmp & (1 << 31))) {
  469. DRM_INFO("radeon: cp reset failed (0x%08X)\n", tmp);
  470. }
  471. } else {
  472. DRM_INFO("radeon: cp idle (0x%08X)\n", tmp);
  473. }
  474. /* Align ring size */
  475. rb_bufsz = drm_order(ring_size / 8);
  476. ring_size = (1 << (rb_bufsz + 1)) * 4;
  477. r100_cp_load_microcode(rdev);
  478. r = radeon_ring_init(rdev, ring_size);
  479. if (r) {
  480. return r;
  481. }
  482. /* Each time the cp read 1024 bytes (16 dword/quadword) update
  483. * the rptr copy in system ram */
  484. rb_blksz = 9;
  485. /* cp will read 128bytes at a time (4 dwords) */
  486. max_fetch = 1;
  487. rdev->cp.align_mask = 16 - 1;
  488. /* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
  489. pre_write_timer = 64;
  490. /* Force CP_RB_WPTR write if written more than one time before the
  491. * delay expire
  492. */
  493. pre_write_limit = 0;
  494. /* Setup the cp cache like this (cache size is 96 dwords) :
  495. * RING 0 to 15
  496. * INDIRECT1 16 to 79
  497. * INDIRECT2 80 to 95
  498. * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
  499. * indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
  500. * indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
  501. * Idea being that most of the gpu cmd will be through indirect1 buffer
  502. * so it gets the bigger cache.
  503. */
  504. indirect2_start = 80;
  505. indirect1_start = 16;
  506. /* cp setup */
  507. WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
  508. WREG32(RADEON_CP_RB_CNTL,
  509. #ifdef __BIG_ENDIAN
  510. RADEON_BUF_SWAP_32BIT |
  511. #endif
  512. REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
  513. REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
  514. REG_SET(RADEON_MAX_FETCH, max_fetch) |
  515. RADEON_RB_NO_UPDATE);
  516. /* Set ring address */
  517. DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
  518. WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
  519. /* Force read & write ptr to 0 */
  520. tmp = RREG32(RADEON_CP_RB_CNTL);
  521. WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
  522. WREG32(RADEON_CP_RB_RPTR_WR, 0);
  523. WREG32(RADEON_CP_RB_WPTR, 0);
  524. WREG32(RADEON_CP_RB_CNTL, tmp);
  525. udelay(10);
  526. rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
  527. rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
  528. /* Set cp mode to bus mastering & enable cp*/
  529. WREG32(RADEON_CP_CSQ_MODE,
  530. REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
  531. REG_SET(RADEON_INDIRECT1_START, indirect1_start));
  532. WREG32(0x718, 0);
  533. WREG32(0x744, 0x00004D4D);
  534. WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
  535. radeon_ring_start(rdev);
  536. r = radeon_ring_test(rdev);
  537. if (r) {
  538. DRM_ERROR("radeon: cp isn't working (%d).\n", r);
  539. return r;
  540. }
  541. rdev->cp.ready = true;
  542. return 0;
  543. }
  544. void r100_cp_fini(struct radeon_device *rdev)
  545. {
  546. /* Disable ring */
  547. rdev->cp.ready = false;
  548. WREG32(RADEON_CP_CSQ_CNTL, 0);
  549. radeon_ring_fini(rdev);
  550. DRM_INFO("radeon: cp finalized\n");
  551. }
  552. void r100_cp_disable(struct radeon_device *rdev)
  553. {
  554. /* Disable ring */
  555. rdev->cp.ready = false;
  556. WREG32(RADEON_CP_CSQ_MODE, 0);
  557. WREG32(RADEON_CP_CSQ_CNTL, 0);
  558. if (r100_gui_wait_for_idle(rdev)) {
  559. printk(KERN_WARNING "Failed to wait GUI idle while "
  560. "programming pipes. Bad things might happen.\n");
  561. }
  562. }
  563. int r100_cp_reset(struct radeon_device *rdev)
  564. {
  565. uint32_t tmp;
  566. bool reinit_cp;
  567. int i;
  568. reinit_cp = rdev->cp.ready;
  569. rdev->cp.ready = false;
  570. WREG32(RADEON_CP_CSQ_MODE, 0);
  571. WREG32(RADEON_CP_CSQ_CNTL, 0);
  572. WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
  573. (void)RREG32(RADEON_RBBM_SOFT_RESET);
  574. udelay(200);
  575. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  576. /* Wait to prevent race in RBBM_STATUS */
  577. mdelay(1);
  578. for (i = 0; i < rdev->usec_timeout; i++) {
  579. tmp = RREG32(RADEON_RBBM_STATUS);
  580. if (!(tmp & (1 << 16))) {
  581. DRM_INFO("CP reset succeed (RBBM_STATUS=0x%08X)\n",
  582. tmp);
  583. if (reinit_cp) {
  584. return r100_cp_init(rdev, rdev->cp.ring_size);
  585. }
  586. return 0;
  587. }
  588. DRM_UDELAY(1);
  589. }
  590. tmp = RREG32(RADEON_RBBM_STATUS);
  591. DRM_ERROR("Failed to reset CP (RBBM_STATUS=0x%08X)!\n", tmp);
  592. return -1;
  593. }
  594. /*
  595. * CS functions
  596. */
  597. int r100_cs_parse_packet0(struct radeon_cs_parser *p,
  598. struct radeon_cs_packet *pkt,
  599. const unsigned *auth, unsigned n,
  600. radeon_packet0_check_t check)
  601. {
  602. unsigned reg;
  603. unsigned i, j, m;
  604. unsigned idx;
  605. int r;
  606. idx = pkt->idx + 1;
  607. reg = pkt->reg;
  608. /* Check that register fall into register range
  609. * determined by the number of entry (n) in the
  610. * safe register bitmap.
  611. */
  612. if (pkt->one_reg_wr) {
  613. if ((reg >> 7) > n) {
  614. return -EINVAL;
  615. }
  616. } else {
  617. if (((reg + (pkt->count << 2)) >> 7) > n) {
  618. return -EINVAL;
  619. }
  620. }
  621. for (i = 0; i <= pkt->count; i++, idx++) {
  622. j = (reg >> 7);
  623. m = 1 << ((reg >> 2) & 31);
  624. if (auth[j] & m) {
  625. r = check(p, pkt, idx, reg);
  626. if (r) {
  627. return r;
  628. }
  629. }
  630. if (pkt->one_reg_wr) {
  631. if (!(auth[j] & m)) {
  632. break;
  633. }
  634. } else {
  635. reg += 4;
  636. }
  637. }
  638. return 0;
  639. }
  640. void r100_cs_dump_packet(struct radeon_cs_parser *p,
  641. struct radeon_cs_packet *pkt)
  642. {
  643. struct radeon_cs_chunk *ib_chunk;
  644. volatile uint32_t *ib;
  645. unsigned i;
  646. unsigned idx;
  647. ib = p->ib->ptr;
  648. ib_chunk = &p->chunks[p->chunk_ib_idx];
  649. idx = pkt->idx;
  650. for (i = 0; i <= (pkt->count + 1); i++, idx++) {
  651. DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
  652. }
  653. }
  654. /**
  655. * r100_cs_packet_parse() - parse cp packet and point ib index to next packet
  656. * @parser: parser structure holding parsing context.
  657. * @pkt: where to store packet informations
  658. *
  659. * Assume that chunk_ib_index is properly set. Will return -EINVAL
  660. * if packet is bigger than remaining ib size. or if packets is unknown.
  661. **/
  662. int r100_cs_packet_parse(struct radeon_cs_parser *p,
  663. struct radeon_cs_packet *pkt,
  664. unsigned idx)
  665. {
  666. struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
  667. uint32_t header = ib_chunk->kdata[idx];
  668. if (idx >= ib_chunk->length_dw) {
  669. DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
  670. idx, ib_chunk->length_dw);
  671. return -EINVAL;
  672. }
  673. pkt->idx = idx;
  674. pkt->type = CP_PACKET_GET_TYPE(header);
  675. pkt->count = CP_PACKET_GET_COUNT(header);
  676. switch (pkt->type) {
  677. case PACKET_TYPE0:
  678. pkt->reg = CP_PACKET0_GET_REG(header);
  679. pkt->one_reg_wr = CP_PACKET0_GET_ONE_REG_WR(header);
  680. break;
  681. case PACKET_TYPE3:
  682. pkt->opcode = CP_PACKET3_GET_OPCODE(header);
  683. break;
  684. case PACKET_TYPE2:
  685. pkt->count = -1;
  686. break;
  687. default:
  688. DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
  689. return -EINVAL;
  690. }
  691. if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
  692. DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
  693. pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
  694. return -EINVAL;
  695. }
  696. return 0;
  697. }
  698. /**
  699. * r100_cs_packet_next_vline() - parse userspace VLINE packet
  700. * @parser: parser structure holding parsing context.
  701. *
  702. * Userspace sends a special sequence for VLINE waits.
  703. * PACKET0 - VLINE_START_END + value
  704. * PACKET0 - WAIT_UNTIL +_value
  705. * RELOC (P3) - crtc_id in reloc.
  706. *
  707. * This function parses this and relocates the VLINE START END
  708. * and WAIT UNTIL packets to the correct crtc.
  709. * It also detects a switched off crtc and nulls out the
  710. * wait in that case.
  711. */
  712. int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
  713. {
  714. struct radeon_cs_chunk *ib_chunk;
  715. struct drm_mode_object *obj;
  716. struct drm_crtc *crtc;
  717. struct radeon_crtc *radeon_crtc;
  718. struct radeon_cs_packet p3reloc, waitreloc;
  719. int crtc_id;
  720. int r;
  721. uint32_t header, h_idx, reg;
  722. ib_chunk = &p->chunks[p->chunk_ib_idx];
  723. /* parse the wait until */
  724. r = r100_cs_packet_parse(p, &waitreloc, p->idx);
  725. if (r)
  726. return r;
  727. /* check its a wait until and only 1 count */
  728. if (waitreloc.reg != RADEON_WAIT_UNTIL ||
  729. waitreloc.count != 0) {
  730. DRM_ERROR("vline wait had illegal wait until segment\n");
  731. r = -EINVAL;
  732. return r;
  733. }
  734. if (ib_chunk->kdata[waitreloc.idx + 1] != RADEON_WAIT_CRTC_VLINE) {
  735. DRM_ERROR("vline wait had illegal wait until\n");
  736. r = -EINVAL;
  737. return r;
  738. }
  739. /* jump over the NOP */
  740. r = r100_cs_packet_parse(p, &p3reloc, p->idx);
  741. if (r)
  742. return r;
  743. h_idx = p->idx - 2;
  744. p->idx += waitreloc.count;
  745. p->idx += p3reloc.count;
  746. header = ib_chunk->kdata[h_idx];
  747. crtc_id = ib_chunk->kdata[h_idx + 5];
  748. reg = ib_chunk->kdata[h_idx] >> 2;
  749. mutex_lock(&p->rdev->ddev->mode_config.mutex);
  750. obj = drm_mode_object_find(p->rdev->ddev, crtc_id, DRM_MODE_OBJECT_CRTC);
  751. if (!obj) {
  752. DRM_ERROR("cannot find crtc %d\n", crtc_id);
  753. r = -EINVAL;
  754. goto out;
  755. }
  756. crtc = obj_to_crtc(obj);
  757. radeon_crtc = to_radeon_crtc(crtc);
  758. crtc_id = radeon_crtc->crtc_id;
  759. if (!crtc->enabled) {
  760. /* if the CRTC isn't enabled - we need to nop out the wait until */
  761. ib_chunk->kdata[h_idx + 2] = PACKET2(0);
  762. ib_chunk->kdata[h_idx + 3] = PACKET2(0);
  763. } else if (crtc_id == 1) {
  764. switch (reg) {
  765. case AVIVO_D1MODE_VLINE_START_END:
  766. header &= R300_CP_PACKET0_REG_MASK;
  767. header |= AVIVO_D2MODE_VLINE_START_END >> 2;
  768. break;
  769. case RADEON_CRTC_GUI_TRIG_VLINE:
  770. header &= R300_CP_PACKET0_REG_MASK;
  771. header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
  772. break;
  773. default:
  774. DRM_ERROR("unknown crtc reloc\n");
  775. r = -EINVAL;
  776. goto out;
  777. }
  778. ib_chunk->kdata[h_idx] = header;
  779. ib_chunk->kdata[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
  780. }
  781. out:
  782. mutex_unlock(&p->rdev->ddev->mode_config.mutex);
  783. return r;
  784. }
  785. /**
  786. * r100_cs_packet_next_reloc() - parse next packet which should be reloc packet3
  787. * @parser: parser structure holding parsing context.
  788. * @data: pointer to relocation data
  789. * @offset_start: starting offset
  790. * @offset_mask: offset mask (to align start offset on)
  791. * @reloc: reloc informations
  792. *
  793. * Check next packet is relocation packet3, do bo validation and compute
  794. * GPU offset using the provided start.
  795. **/
  796. int r100_cs_packet_next_reloc(struct radeon_cs_parser *p,
  797. struct radeon_cs_reloc **cs_reloc)
  798. {
  799. struct radeon_cs_chunk *ib_chunk;
  800. struct radeon_cs_chunk *relocs_chunk;
  801. struct radeon_cs_packet p3reloc;
  802. unsigned idx;
  803. int r;
  804. if (p->chunk_relocs_idx == -1) {
  805. DRM_ERROR("No relocation chunk !\n");
  806. return -EINVAL;
  807. }
  808. *cs_reloc = NULL;
  809. ib_chunk = &p->chunks[p->chunk_ib_idx];
  810. relocs_chunk = &p->chunks[p->chunk_relocs_idx];
  811. r = r100_cs_packet_parse(p, &p3reloc, p->idx);
  812. if (r) {
  813. return r;
  814. }
  815. p->idx += p3reloc.count + 2;
  816. if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
  817. DRM_ERROR("No packet3 for relocation for packet at %d.\n",
  818. p3reloc.idx);
  819. r100_cs_dump_packet(p, &p3reloc);
  820. return -EINVAL;
  821. }
  822. idx = ib_chunk->kdata[p3reloc.idx + 1];
  823. if (idx >= relocs_chunk->length_dw) {
  824. DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
  825. idx, relocs_chunk->length_dw);
  826. r100_cs_dump_packet(p, &p3reloc);
  827. return -EINVAL;
  828. }
  829. /* FIXME: we assume reloc size is 4 dwords */
  830. *cs_reloc = p->relocs_ptr[(idx / 4)];
  831. return 0;
  832. }
  833. static int r100_packet0_check(struct radeon_cs_parser *p,
  834. struct radeon_cs_packet *pkt)
  835. {
  836. struct radeon_cs_chunk *ib_chunk;
  837. struct radeon_cs_reloc *reloc;
  838. volatile uint32_t *ib;
  839. uint32_t tmp;
  840. unsigned reg;
  841. unsigned i;
  842. unsigned idx;
  843. bool onereg;
  844. int r;
  845. u32 tile_flags = 0;
  846. ib = p->ib->ptr;
  847. ib_chunk = &p->chunks[p->chunk_ib_idx];
  848. idx = pkt->idx + 1;
  849. reg = pkt->reg;
  850. onereg = false;
  851. if (CP_PACKET0_GET_ONE_REG_WR(ib_chunk->kdata[pkt->idx])) {
  852. onereg = true;
  853. }
  854. for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
  855. switch (reg) {
  856. case RADEON_CRTC_GUI_TRIG_VLINE:
  857. r = r100_cs_packet_parse_vline(p);
  858. if (r) {
  859. DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
  860. idx, reg);
  861. r100_cs_dump_packet(p, pkt);
  862. return r;
  863. }
  864. break;
  865. /* FIXME: only allow PACKET3 blit? easier to check for out of
  866. * range access */
  867. case RADEON_DST_PITCH_OFFSET:
  868. case RADEON_SRC_PITCH_OFFSET:
  869. r = r100_cs_packet_next_reloc(p, &reloc);
  870. if (r) {
  871. DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
  872. idx, reg);
  873. r100_cs_dump_packet(p, pkt);
  874. return r;
  875. }
  876. tmp = ib_chunk->kdata[idx] & 0x003fffff;
  877. tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
  878. if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
  879. tile_flags |= RADEON_DST_TILE_MACRO;
  880. if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
  881. if (reg == RADEON_SRC_PITCH_OFFSET) {
  882. DRM_ERROR("Cannot src blit from microtiled surface\n");
  883. r100_cs_dump_packet(p, pkt);
  884. return -EINVAL;
  885. }
  886. tile_flags |= RADEON_DST_TILE_MICRO;
  887. }
  888. tmp |= tile_flags;
  889. ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
  890. break;
  891. case RADEON_RB3D_DEPTHOFFSET:
  892. case RADEON_RB3D_COLOROFFSET:
  893. case R300_RB3D_COLOROFFSET0:
  894. case R300_ZB_DEPTHOFFSET:
  895. case R200_PP_TXOFFSET_0:
  896. case R200_PP_TXOFFSET_1:
  897. case R200_PP_TXOFFSET_2:
  898. case R200_PP_TXOFFSET_3:
  899. case R200_PP_TXOFFSET_4:
  900. case R200_PP_TXOFFSET_5:
  901. case RADEON_PP_TXOFFSET_0:
  902. case RADEON_PP_TXOFFSET_1:
  903. case RADEON_PP_TXOFFSET_2:
  904. case R300_TX_OFFSET_0:
  905. case R300_TX_OFFSET_0+4:
  906. case R300_TX_OFFSET_0+8:
  907. case R300_TX_OFFSET_0+12:
  908. case R300_TX_OFFSET_0+16:
  909. case R300_TX_OFFSET_0+20:
  910. case R300_TX_OFFSET_0+24:
  911. case R300_TX_OFFSET_0+28:
  912. case R300_TX_OFFSET_0+32:
  913. case R300_TX_OFFSET_0+36:
  914. case R300_TX_OFFSET_0+40:
  915. case R300_TX_OFFSET_0+44:
  916. case R300_TX_OFFSET_0+48:
  917. case R300_TX_OFFSET_0+52:
  918. case R300_TX_OFFSET_0+56:
  919. case R300_TX_OFFSET_0+60:
  920. /* rn50 has no 3D engine so fail on any 3d setup */
  921. if (ASIC_IS_RN50(p->rdev)) {
  922. DRM_ERROR("attempt to use RN50 3D engine failed\n");
  923. return -EINVAL;
  924. }
  925. r = r100_cs_packet_next_reloc(p, &reloc);
  926. if (r) {
  927. DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
  928. idx, reg);
  929. r100_cs_dump_packet(p, pkt);
  930. return r;
  931. }
  932. ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
  933. break;
  934. case R300_RB3D_COLORPITCH0:
  935. case RADEON_RB3D_COLORPITCH:
  936. r = r100_cs_packet_next_reloc(p, &reloc);
  937. if (r) {
  938. DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
  939. idx, reg);
  940. r100_cs_dump_packet(p, pkt);
  941. return r;
  942. }
  943. if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
  944. tile_flags |= RADEON_COLOR_TILE_ENABLE;
  945. if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
  946. tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
  947. tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
  948. tmp |= tile_flags;
  949. ib[idx] = tmp;
  950. break;
  951. default:
  952. /* FIXME: we don't want to allow anyothers packet */
  953. break;
  954. }
  955. if (onereg) {
  956. /* FIXME: forbid onereg write to register on relocate */
  957. break;
  958. }
  959. }
  960. return 0;
  961. }
  962. int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
  963. struct radeon_cs_packet *pkt,
  964. struct radeon_object *robj)
  965. {
  966. struct radeon_cs_chunk *ib_chunk;
  967. unsigned idx;
  968. ib_chunk = &p->chunks[p->chunk_ib_idx];
  969. idx = pkt->idx + 1;
  970. if ((ib_chunk->kdata[idx+2] + 1) > radeon_object_size(robj)) {
  971. DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
  972. "(need %u have %lu) !\n",
  973. ib_chunk->kdata[idx+2] + 1,
  974. radeon_object_size(robj));
  975. return -EINVAL;
  976. }
  977. return 0;
  978. }
  979. static int r100_packet3_check(struct radeon_cs_parser *p,
  980. struct radeon_cs_packet *pkt)
  981. {
  982. struct radeon_cs_chunk *ib_chunk;
  983. struct radeon_cs_reloc *reloc;
  984. unsigned idx;
  985. unsigned i, c;
  986. volatile uint32_t *ib;
  987. int r;
  988. ib = p->ib->ptr;
  989. ib_chunk = &p->chunks[p->chunk_ib_idx];
  990. idx = pkt->idx + 1;
  991. switch (pkt->opcode) {
  992. case PACKET3_3D_LOAD_VBPNTR:
  993. c = ib_chunk->kdata[idx++];
  994. for (i = 0; i < (c - 1); i += 2, idx += 3) {
  995. r = r100_cs_packet_next_reloc(p, &reloc);
  996. if (r) {
  997. DRM_ERROR("No reloc for packet3 %d\n",
  998. pkt->opcode);
  999. r100_cs_dump_packet(p, pkt);
  1000. return r;
  1001. }
  1002. ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
  1003. r = r100_cs_packet_next_reloc(p, &reloc);
  1004. if (r) {
  1005. DRM_ERROR("No reloc for packet3 %d\n",
  1006. pkt->opcode);
  1007. r100_cs_dump_packet(p, pkt);
  1008. return r;
  1009. }
  1010. ib[idx+2] = ib_chunk->kdata[idx+2] + ((u32)reloc->lobj.gpu_offset);
  1011. }
  1012. if (c & 1) {
  1013. r = r100_cs_packet_next_reloc(p, &reloc);
  1014. if (r) {
  1015. DRM_ERROR("No reloc for packet3 %d\n",
  1016. pkt->opcode);
  1017. r100_cs_dump_packet(p, pkt);
  1018. return r;
  1019. }
  1020. ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
  1021. }
  1022. break;
  1023. case PACKET3_INDX_BUFFER:
  1024. r = r100_cs_packet_next_reloc(p, &reloc);
  1025. if (r) {
  1026. DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
  1027. r100_cs_dump_packet(p, pkt);
  1028. return r;
  1029. }
  1030. ib[idx+1] = ib_chunk->kdata[idx+1] + ((u32)reloc->lobj.gpu_offset);
  1031. r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
  1032. if (r) {
  1033. return r;
  1034. }
  1035. break;
  1036. case 0x23:
  1037. /* FIXME: cleanup */
  1038. /* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
  1039. r = r100_cs_packet_next_reloc(p, &reloc);
  1040. if (r) {
  1041. DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
  1042. r100_cs_dump_packet(p, pkt);
  1043. return r;
  1044. }
  1045. ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
  1046. break;
  1047. case PACKET3_3D_DRAW_IMMD:
  1048. /* triggers drawing using in-packet vertex data */
  1049. case PACKET3_3D_DRAW_IMMD_2:
  1050. /* triggers drawing using in-packet vertex data */
  1051. case PACKET3_3D_DRAW_VBUF_2:
  1052. /* triggers drawing of vertex buffers setup elsewhere */
  1053. case PACKET3_3D_DRAW_INDX_2:
  1054. /* triggers drawing using indices to vertex buffer */
  1055. case PACKET3_3D_DRAW_VBUF:
  1056. /* triggers drawing of vertex buffers setup elsewhere */
  1057. case PACKET3_3D_DRAW_INDX:
  1058. /* triggers drawing using indices to vertex buffer */
  1059. case PACKET3_NOP:
  1060. break;
  1061. default:
  1062. DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
  1063. return -EINVAL;
  1064. }
  1065. return 0;
  1066. }
  1067. int r100_cs_parse(struct radeon_cs_parser *p)
  1068. {
  1069. struct radeon_cs_packet pkt;
  1070. int r;
  1071. do {
  1072. r = r100_cs_packet_parse(p, &pkt, p->idx);
  1073. if (r) {
  1074. return r;
  1075. }
  1076. p->idx += pkt.count + 2;
  1077. switch (pkt.type) {
  1078. case PACKET_TYPE0:
  1079. r = r100_packet0_check(p, &pkt);
  1080. break;
  1081. case PACKET_TYPE2:
  1082. break;
  1083. case PACKET_TYPE3:
  1084. r = r100_packet3_check(p, &pkt);
  1085. break;
  1086. default:
  1087. DRM_ERROR("Unknown packet type %d !\n",
  1088. pkt.type);
  1089. return -EINVAL;
  1090. }
  1091. if (r) {
  1092. return r;
  1093. }
  1094. } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
  1095. return 0;
  1096. }
  1097. /*
  1098. * Global GPU functions
  1099. */
  1100. void r100_errata(struct radeon_device *rdev)
  1101. {
  1102. rdev->pll_errata = 0;
  1103. if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
  1104. rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
  1105. }
  1106. if (rdev->family == CHIP_RV100 ||
  1107. rdev->family == CHIP_RS100 ||
  1108. rdev->family == CHIP_RS200) {
  1109. rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
  1110. }
  1111. }
  1112. /* Wait for vertical sync on primary CRTC */
  1113. void r100_gpu_wait_for_vsync(struct radeon_device *rdev)
  1114. {
  1115. uint32_t crtc_gen_cntl, tmp;
  1116. int i;
  1117. crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL);
  1118. if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) ||
  1119. !(crtc_gen_cntl & RADEON_CRTC_EN)) {
  1120. return;
  1121. }
  1122. /* Clear the CRTC_VBLANK_SAVE bit */
  1123. WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR);
  1124. for (i = 0; i < rdev->usec_timeout; i++) {
  1125. tmp = RREG32(RADEON_CRTC_STATUS);
  1126. if (tmp & RADEON_CRTC_VBLANK_SAVE) {
  1127. return;
  1128. }
  1129. DRM_UDELAY(1);
  1130. }
  1131. }
  1132. /* Wait for vertical sync on secondary CRTC */
  1133. void r100_gpu_wait_for_vsync2(struct radeon_device *rdev)
  1134. {
  1135. uint32_t crtc2_gen_cntl, tmp;
  1136. int i;
  1137. crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL);
  1138. if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) ||
  1139. !(crtc2_gen_cntl & RADEON_CRTC2_EN))
  1140. return;
  1141. /* Clear the CRTC_VBLANK_SAVE bit */
  1142. WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR);
  1143. for (i = 0; i < rdev->usec_timeout; i++) {
  1144. tmp = RREG32(RADEON_CRTC2_STATUS);
  1145. if (tmp & RADEON_CRTC2_VBLANK_SAVE) {
  1146. return;
  1147. }
  1148. DRM_UDELAY(1);
  1149. }
  1150. }
  1151. int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
  1152. {
  1153. unsigned i;
  1154. uint32_t tmp;
  1155. for (i = 0; i < rdev->usec_timeout; i++) {
  1156. tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
  1157. if (tmp >= n) {
  1158. return 0;
  1159. }
  1160. DRM_UDELAY(1);
  1161. }
  1162. return -1;
  1163. }
  1164. int r100_gui_wait_for_idle(struct radeon_device *rdev)
  1165. {
  1166. unsigned i;
  1167. uint32_t tmp;
  1168. if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
  1169. printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !"
  1170. " Bad things might happen.\n");
  1171. }
  1172. for (i = 0; i < rdev->usec_timeout; i++) {
  1173. tmp = RREG32(RADEON_RBBM_STATUS);
  1174. if (!(tmp & (1 << 31))) {
  1175. return 0;
  1176. }
  1177. DRM_UDELAY(1);
  1178. }
  1179. return -1;
  1180. }
  1181. int r100_mc_wait_for_idle(struct radeon_device *rdev)
  1182. {
  1183. unsigned i;
  1184. uint32_t tmp;
  1185. for (i = 0; i < rdev->usec_timeout; i++) {
  1186. /* read MC_STATUS */
  1187. tmp = RREG32(0x0150);
  1188. if (tmp & (1 << 2)) {
  1189. return 0;
  1190. }
  1191. DRM_UDELAY(1);
  1192. }
  1193. return -1;
  1194. }
  1195. void r100_gpu_init(struct radeon_device *rdev)
  1196. {
  1197. /* TODO: anythings to do here ? pipes ? */
  1198. r100_hdp_reset(rdev);
  1199. }
  1200. void r100_hdp_reset(struct radeon_device *rdev)
  1201. {
  1202. uint32_t tmp;
  1203. tmp = RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL;
  1204. tmp |= (7 << 28);
  1205. WREG32(RADEON_HOST_PATH_CNTL, tmp | RADEON_HDP_SOFT_RESET | RADEON_HDP_READ_BUFFER_INVALIDATE);
  1206. (void)RREG32(RADEON_HOST_PATH_CNTL);
  1207. udelay(200);
  1208. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  1209. WREG32(RADEON_HOST_PATH_CNTL, tmp);
  1210. (void)RREG32(RADEON_HOST_PATH_CNTL);
  1211. }
  1212. int r100_rb2d_reset(struct radeon_device *rdev)
  1213. {
  1214. uint32_t tmp;
  1215. int i;
  1216. WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_E2);
  1217. (void)RREG32(RADEON_RBBM_SOFT_RESET);
  1218. udelay(200);
  1219. WREG32(RADEON_RBBM_SOFT_RESET, 0);
  1220. /* Wait to prevent race in RBBM_STATUS */
  1221. mdelay(1);
  1222. for (i = 0; i < rdev->usec_timeout; i++) {
  1223. tmp = RREG32(RADEON_RBBM_STATUS);
  1224. if (!(tmp & (1 << 26))) {
  1225. DRM_INFO("RB2D reset succeed (RBBM_STATUS=0x%08X)\n",
  1226. tmp);
  1227. return 0;
  1228. }
  1229. DRM_UDELAY(1);
  1230. }
  1231. tmp = RREG32(RADEON_RBBM_STATUS);
  1232. DRM_ERROR("Failed to reset RB2D (RBBM_STATUS=0x%08X)!\n", tmp);
  1233. return -1;
  1234. }
  1235. int r100_gpu_reset(struct radeon_device *rdev)
  1236. {
  1237. uint32_t status;
  1238. /* reset order likely matter */
  1239. status = RREG32(RADEON_RBBM_STATUS);
  1240. /* reset HDP */
  1241. r100_hdp_reset(rdev);
  1242. /* reset rb2d */
  1243. if (status & ((1 << 17) | (1 << 18) | (1 << 27))) {
  1244. r100_rb2d_reset(rdev);
  1245. }
  1246. /* TODO: reset 3D engine */
  1247. /* reset CP */
  1248. status = RREG32(RADEON_RBBM_STATUS);
  1249. if (status & (1 << 16)) {
  1250. r100_cp_reset(rdev);
  1251. }
  1252. /* Check if GPU is idle */
  1253. status = RREG32(RADEON_RBBM_STATUS);
  1254. if (status & (1 << 31)) {
  1255. DRM_ERROR("Failed to reset GPU (RBBM_STATUS=0x%08X)\n", status);
  1256. return -1;
  1257. }
  1258. DRM_INFO("GPU reset succeed (RBBM_STATUS=0x%08X)\n", status);
  1259. return 0;
  1260. }
  1261. /*
  1262. * VRAM info
  1263. */
  1264. static void r100_vram_get_type(struct radeon_device *rdev)
  1265. {
  1266. uint32_t tmp;
  1267. rdev->mc.vram_is_ddr = false;
  1268. if (rdev->flags & RADEON_IS_IGP)
  1269. rdev->mc.vram_is_ddr = true;
  1270. else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
  1271. rdev->mc.vram_is_ddr = true;
  1272. if ((rdev->family == CHIP_RV100) ||
  1273. (rdev->family == CHIP_RS100) ||
  1274. (rdev->family == CHIP_RS200)) {
  1275. tmp = RREG32(RADEON_MEM_CNTL);
  1276. if (tmp & RV100_HALF_MODE) {
  1277. rdev->mc.vram_width = 32;
  1278. } else {
  1279. rdev->mc.vram_width = 64;
  1280. }
  1281. if (rdev->flags & RADEON_SINGLE_CRTC) {
  1282. rdev->mc.vram_width /= 4;
  1283. rdev->mc.vram_is_ddr = true;
  1284. }
  1285. } else if (rdev->family <= CHIP_RV280) {
  1286. tmp = RREG32(RADEON_MEM_CNTL);
  1287. if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
  1288. rdev->mc.vram_width = 128;
  1289. } else {
  1290. rdev->mc.vram_width = 64;
  1291. }
  1292. } else {
  1293. /* newer IGPs */
  1294. rdev->mc.vram_width = 128;
  1295. }
  1296. }
  1297. static u32 r100_get_accessible_vram(struct radeon_device *rdev)
  1298. {
  1299. u32 aper_size;
  1300. u8 byte;
  1301. aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
  1302. /* Set HDP_APER_CNTL only on cards that are known not to be broken,
  1303. * that is has the 2nd generation multifunction PCI interface
  1304. */
  1305. if (rdev->family == CHIP_RV280 ||
  1306. rdev->family >= CHIP_RV350) {
  1307. WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
  1308. ~RADEON_HDP_APER_CNTL);
  1309. DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
  1310. return aper_size * 2;
  1311. }
  1312. /* Older cards have all sorts of funny issues to deal with. First
  1313. * check if it's a multifunction card by reading the PCI config
  1314. * header type... Limit those to one aperture size
  1315. */
  1316. pci_read_config_byte(rdev->pdev, 0xe, &byte);
  1317. if (byte & 0x80) {
  1318. DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
  1319. DRM_INFO("Limiting VRAM to one aperture\n");
  1320. return aper_size;
  1321. }
  1322. /* Single function older card. We read HDP_APER_CNTL to see how the BIOS
  1323. * have set it up. We don't write this as it's broken on some ASICs but
  1324. * we expect the BIOS to have done the right thing (might be too optimistic...)
  1325. */
  1326. if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
  1327. return aper_size * 2;
  1328. return aper_size;
  1329. }
  1330. void r100_vram_init_sizes(struct radeon_device *rdev)
  1331. {
  1332. u64 config_aper_size;
  1333. u32 accessible;
  1334. config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
  1335. if (rdev->flags & RADEON_IS_IGP) {
  1336. uint32_t tom;
  1337. /* read NB_TOM to get the amount of ram stolen for the GPU */
  1338. tom = RREG32(RADEON_NB_TOM);
  1339. rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
  1340. /* for IGPs we need to keep VRAM where it was put by the BIOS */
  1341. rdev->mc.vram_location = (tom & 0xffff) << 16;
  1342. WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
  1343. rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
  1344. } else {
  1345. rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
  1346. /* Some production boards of m6 will report 0
  1347. * if it's 8 MB
  1348. */
  1349. if (rdev->mc.real_vram_size == 0) {
  1350. rdev->mc.real_vram_size = 8192 * 1024;
  1351. WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
  1352. }
  1353. /* let driver place VRAM */
  1354. rdev->mc.vram_location = 0xFFFFFFFFUL;
  1355. /* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
  1356. * Novell bug 204882 + along with lots of ubuntu ones */
  1357. if (config_aper_size > rdev->mc.real_vram_size)
  1358. rdev->mc.mc_vram_size = config_aper_size;
  1359. else
  1360. rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
  1361. }
  1362. /* work out accessible VRAM */
  1363. accessible = r100_get_accessible_vram(rdev);
  1364. rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
  1365. rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
  1366. if (accessible > rdev->mc.aper_size)
  1367. accessible = rdev->mc.aper_size;
  1368. if (rdev->mc.mc_vram_size > rdev->mc.aper_size)
  1369. rdev->mc.mc_vram_size = rdev->mc.aper_size;
  1370. if (rdev->mc.real_vram_size > rdev->mc.aper_size)
  1371. rdev->mc.real_vram_size = rdev->mc.aper_size;
  1372. }
  1373. void r100_vram_info(struct radeon_device *rdev)
  1374. {
  1375. r100_vram_get_type(rdev);
  1376. r100_vram_init_sizes(rdev);
  1377. }
  1378. /*
  1379. * Indirect registers accessor
  1380. */
  1381. void r100_pll_errata_after_index(struct radeon_device *rdev)
  1382. {
  1383. if (!(rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS)) {
  1384. return;
  1385. }
  1386. (void)RREG32(RADEON_CLOCK_CNTL_DATA);
  1387. (void)RREG32(RADEON_CRTC_GEN_CNTL);
  1388. }
  1389. static void r100_pll_errata_after_data(struct radeon_device *rdev)
  1390. {
  1391. /* This workarounds is necessary on RV100, RS100 and RS200 chips
  1392. * or the chip could hang on a subsequent access
  1393. */
  1394. if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
  1395. udelay(5000);
  1396. }
  1397. /* This function is required to workaround a hardware bug in some (all?)
  1398. * revisions of the R300. This workaround should be called after every
  1399. * CLOCK_CNTL_INDEX register access. If not, register reads afterward
  1400. * may not be correct.
  1401. */
  1402. if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
  1403. uint32_t save, tmp;
  1404. save = RREG32(RADEON_CLOCK_CNTL_INDEX);
  1405. tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
  1406. WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
  1407. tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
  1408. WREG32(RADEON_CLOCK_CNTL_INDEX, save);
  1409. }
  1410. }
  1411. uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
  1412. {
  1413. uint32_t data;
  1414. WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
  1415. r100_pll_errata_after_index(rdev);
  1416. data = RREG32(RADEON_CLOCK_CNTL_DATA);
  1417. r100_pll_errata_after_data(rdev);
  1418. return data;
  1419. }
  1420. void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
  1421. {
  1422. WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
  1423. r100_pll_errata_after_index(rdev);
  1424. WREG32(RADEON_CLOCK_CNTL_DATA, v);
  1425. r100_pll_errata_after_data(rdev);
  1426. }
  1427. uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
  1428. {
  1429. if (reg < 0x10000)
  1430. return readl(((void __iomem *)rdev->rmmio) + reg);
  1431. else {
  1432. writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
  1433. return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
  1434. }
  1435. }
  1436. void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
  1437. {
  1438. if (reg < 0x10000)
  1439. writel(v, ((void __iomem *)rdev->rmmio) + reg);
  1440. else {
  1441. writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
  1442. writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
  1443. }
  1444. }
  1445. int r100_init(struct radeon_device *rdev)
  1446. {
  1447. return 0;
  1448. }
  1449. /*
  1450. * Debugfs info
  1451. */
  1452. #if defined(CONFIG_DEBUG_FS)
  1453. static int r100_debugfs_rbbm_info(struct seq_file *m, void *data)
  1454. {
  1455. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1456. struct drm_device *dev = node->minor->dev;
  1457. struct radeon_device *rdev = dev->dev_private;
  1458. uint32_t reg, value;
  1459. unsigned i;
  1460. seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
  1461. seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
  1462. seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
  1463. for (i = 0; i < 64; i++) {
  1464. WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
  1465. reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
  1466. WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
  1467. value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
  1468. seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
  1469. }
  1470. return 0;
  1471. }
  1472. static int r100_debugfs_cp_ring_info(struct seq_file *m, void *data)
  1473. {
  1474. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1475. struct drm_device *dev = node->minor->dev;
  1476. struct radeon_device *rdev = dev->dev_private;
  1477. uint32_t rdp, wdp;
  1478. unsigned count, i, j;
  1479. radeon_ring_free_size(rdev);
  1480. rdp = RREG32(RADEON_CP_RB_RPTR);
  1481. wdp = RREG32(RADEON_CP_RB_WPTR);
  1482. count = (rdp + rdev->cp.ring_size - wdp) & rdev->cp.ptr_mask;
  1483. seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
  1484. seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
  1485. seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
  1486. seq_printf(m, "%u free dwords in ring\n", rdev->cp.ring_free_dw);
  1487. seq_printf(m, "%u dwords in ring\n", count);
  1488. for (j = 0; j <= count; j++) {
  1489. i = (rdp + j) & rdev->cp.ptr_mask;
  1490. seq_printf(m, "r[%04d]=0x%08x\n", i, rdev->cp.ring[i]);
  1491. }
  1492. return 0;
  1493. }
  1494. static int r100_debugfs_cp_csq_fifo(struct seq_file *m, void *data)
  1495. {
  1496. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1497. struct drm_device *dev = node->minor->dev;
  1498. struct radeon_device *rdev = dev->dev_private;
  1499. uint32_t csq_stat, csq2_stat, tmp;
  1500. unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
  1501. unsigned i;
  1502. seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
  1503. seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
  1504. csq_stat = RREG32(RADEON_CP_CSQ_STAT);
  1505. csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
  1506. r_rptr = (csq_stat >> 0) & 0x3ff;
  1507. r_wptr = (csq_stat >> 10) & 0x3ff;
  1508. ib1_rptr = (csq_stat >> 20) & 0x3ff;
  1509. ib1_wptr = (csq2_stat >> 0) & 0x3ff;
  1510. ib2_rptr = (csq2_stat >> 10) & 0x3ff;
  1511. ib2_wptr = (csq2_stat >> 20) & 0x3ff;
  1512. seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
  1513. seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
  1514. seq_printf(m, "Ring rptr %u\n", r_rptr);
  1515. seq_printf(m, "Ring wptr %u\n", r_wptr);
  1516. seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
  1517. seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
  1518. seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
  1519. seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
  1520. /* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
  1521. * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
  1522. seq_printf(m, "Ring fifo:\n");
  1523. for (i = 0; i < 256; i++) {
  1524. WREG32(RADEON_CP_CSQ_ADDR, i << 2);
  1525. tmp = RREG32(RADEON_CP_CSQ_DATA);
  1526. seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
  1527. }
  1528. seq_printf(m, "Indirect1 fifo:\n");
  1529. for (i = 256; i <= 512; i++) {
  1530. WREG32(RADEON_CP_CSQ_ADDR, i << 2);
  1531. tmp = RREG32(RADEON_CP_CSQ_DATA);
  1532. seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
  1533. }
  1534. seq_printf(m, "Indirect2 fifo:\n");
  1535. for (i = 640; i < ib1_wptr; i++) {
  1536. WREG32(RADEON_CP_CSQ_ADDR, i << 2);
  1537. tmp = RREG32(RADEON_CP_CSQ_DATA);
  1538. seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
  1539. }
  1540. return 0;
  1541. }
  1542. static int r100_debugfs_mc_info(struct seq_file *m, void *data)
  1543. {
  1544. struct drm_info_node *node = (struct drm_info_node *) m->private;
  1545. struct drm_device *dev = node->minor->dev;
  1546. struct radeon_device *rdev = dev->dev_private;
  1547. uint32_t tmp;
  1548. tmp = RREG32(RADEON_CONFIG_MEMSIZE);
  1549. seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
  1550. tmp = RREG32(RADEON_MC_FB_LOCATION);
  1551. seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
  1552. tmp = RREG32(RADEON_BUS_CNTL);
  1553. seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
  1554. tmp = RREG32(RADEON_MC_AGP_LOCATION);
  1555. seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
  1556. tmp = RREG32(RADEON_AGP_BASE);
  1557. seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
  1558. tmp = RREG32(RADEON_HOST_PATH_CNTL);
  1559. seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
  1560. tmp = RREG32(0x01D0);
  1561. seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
  1562. tmp = RREG32(RADEON_AIC_LO_ADDR);
  1563. seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
  1564. tmp = RREG32(RADEON_AIC_HI_ADDR);
  1565. seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
  1566. tmp = RREG32(0x01E4);
  1567. seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
  1568. return 0;
  1569. }
  1570. static struct drm_info_list r100_debugfs_rbbm_list[] = {
  1571. {"r100_rbbm_info", r100_debugfs_rbbm_info, 0, NULL},
  1572. };
  1573. static struct drm_info_list r100_debugfs_cp_list[] = {
  1574. {"r100_cp_ring_info", r100_debugfs_cp_ring_info, 0, NULL},
  1575. {"r100_cp_csq_fifo", r100_debugfs_cp_csq_fifo, 0, NULL},
  1576. };
  1577. static struct drm_info_list r100_debugfs_mc_info_list[] = {
  1578. {"r100_mc_info", r100_debugfs_mc_info, 0, NULL},
  1579. };
  1580. #endif
  1581. int r100_debugfs_rbbm_init(struct radeon_device *rdev)
  1582. {
  1583. #if defined(CONFIG_DEBUG_FS)
  1584. return radeon_debugfs_add_files(rdev, r100_debugfs_rbbm_list, 1);
  1585. #else
  1586. return 0;
  1587. #endif
  1588. }
  1589. int r100_debugfs_cp_init(struct radeon_device *rdev)
  1590. {
  1591. #if defined(CONFIG_DEBUG_FS)
  1592. return radeon_debugfs_add_files(rdev, r100_debugfs_cp_list, 2);
  1593. #else
  1594. return 0;
  1595. #endif
  1596. }
  1597. int r100_debugfs_mc_info_init(struct radeon_device *rdev)
  1598. {
  1599. #if defined(CONFIG_DEBUG_FS)
  1600. return radeon_debugfs_add_files(rdev, r100_debugfs_mc_info_list, 1);
  1601. #else
  1602. return 0;
  1603. #endif
  1604. }
  1605. int r100_set_surface_reg(struct radeon_device *rdev, int reg,
  1606. uint32_t tiling_flags, uint32_t pitch,
  1607. uint32_t offset, uint32_t obj_size)
  1608. {
  1609. int surf_index = reg * 16;
  1610. int flags = 0;
  1611. /* r100/r200 divide by 16 */
  1612. if (rdev->family < CHIP_R300)
  1613. flags = pitch / 16;
  1614. else
  1615. flags = pitch / 8;
  1616. if (rdev->family <= CHIP_RS200) {
  1617. if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
  1618. == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
  1619. flags |= RADEON_SURF_TILE_COLOR_BOTH;
  1620. if (tiling_flags & RADEON_TILING_MACRO)
  1621. flags |= RADEON_SURF_TILE_COLOR_MACRO;
  1622. } else if (rdev->family <= CHIP_RV280) {
  1623. if (tiling_flags & (RADEON_TILING_MACRO))
  1624. flags |= R200_SURF_TILE_COLOR_MACRO;
  1625. if (tiling_flags & RADEON_TILING_MICRO)
  1626. flags |= R200_SURF_TILE_COLOR_MICRO;
  1627. } else {
  1628. if (tiling_flags & RADEON_TILING_MACRO)
  1629. flags |= R300_SURF_TILE_MACRO;
  1630. if (tiling_flags & RADEON_TILING_MICRO)
  1631. flags |= R300_SURF_TILE_MICRO;
  1632. }
  1633. DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
  1634. WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
  1635. WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
  1636. WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
  1637. return 0;
  1638. }
  1639. void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
  1640. {
  1641. int surf_index = reg * 16;
  1642. WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
  1643. }
  1644. void r100_bandwidth_update(struct radeon_device *rdev)
  1645. {
  1646. fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
  1647. fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
  1648. fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff, crit_point_ff;
  1649. uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
  1650. fixed20_12 memtcas_ff[8] = {
  1651. fixed_init(1),
  1652. fixed_init(2),
  1653. fixed_init(3),
  1654. fixed_init(0),
  1655. fixed_init_half(1),
  1656. fixed_init_half(2),
  1657. fixed_init(0),
  1658. };
  1659. fixed20_12 memtcas_rs480_ff[8] = {
  1660. fixed_init(0),
  1661. fixed_init(1),
  1662. fixed_init(2),
  1663. fixed_init(3),
  1664. fixed_init(0),
  1665. fixed_init_half(1),
  1666. fixed_init_half(2),
  1667. fixed_init_half(3),
  1668. };
  1669. fixed20_12 memtcas2_ff[8] = {
  1670. fixed_init(0),
  1671. fixed_init(1),
  1672. fixed_init(2),
  1673. fixed_init(3),
  1674. fixed_init(4),
  1675. fixed_init(5),
  1676. fixed_init(6),
  1677. fixed_init(7),
  1678. };
  1679. fixed20_12 memtrbs[8] = {
  1680. fixed_init(1),
  1681. fixed_init_half(1),
  1682. fixed_init(2),
  1683. fixed_init_half(2),
  1684. fixed_init(3),
  1685. fixed_init_half(3),
  1686. fixed_init(4),
  1687. fixed_init_half(4)
  1688. };
  1689. fixed20_12 memtrbs_r4xx[8] = {
  1690. fixed_init(4),
  1691. fixed_init(5),
  1692. fixed_init(6),
  1693. fixed_init(7),
  1694. fixed_init(8),
  1695. fixed_init(9),
  1696. fixed_init(10),
  1697. fixed_init(11)
  1698. };
  1699. fixed20_12 min_mem_eff;
  1700. fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
  1701. fixed20_12 cur_latency_mclk, cur_latency_sclk;
  1702. fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate,
  1703. disp_drain_rate2, read_return_rate;
  1704. fixed20_12 time_disp1_drop_priority;
  1705. int c;
  1706. int cur_size = 16; /* in octawords */
  1707. int critical_point = 0, critical_point2;
  1708. /* uint32_t read_return_rate, time_disp1_drop_priority; */
  1709. int stop_req, max_stop_req;
  1710. struct drm_display_mode *mode1 = NULL;
  1711. struct drm_display_mode *mode2 = NULL;
  1712. uint32_t pixel_bytes1 = 0;
  1713. uint32_t pixel_bytes2 = 0;
  1714. if (rdev->mode_info.crtcs[0]->base.enabled) {
  1715. mode1 = &rdev->mode_info.crtcs[0]->base.mode;
  1716. pixel_bytes1 = rdev->mode_info.crtcs[0]->base.fb->bits_per_pixel / 8;
  1717. }
  1718. if (rdev->mode_info.crtcs[1]->base.enabled) {
  1719. mode2 = &rdev->mode_info.crtcs[1]->base.mode;
  1720. pixel_bytes2 = rdev->mode_info.crtcs[1]->base.fb->bits_per_pixel / 8;
  1721. }
  1722. min_mem_eff.full = rfixed_const_8(0);
  1723. /* get modes */
  1724. if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
  1725. uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
  1726. mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
  1727. mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
  1728. /* check crtc enables */
  1729. if (mode2)
  1730. mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
  1731. if (mode1)
  1732. mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
  1733. WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
  1734. }
  1735. /*
  1736. * determine is there is enough bw for current mode
  1737. */
  1738. mclk_ff.full = rfixed_const(rdev->clock.default_mclk);
  1739. temp_ff.full = rfixed_const(100);
  1740. mclk_ff.full = rfixed_div(mclk_ff, temp_ff);
  1741. sclk_ff.full = rfixed_const(rdev->clock.default_sclk);
  1742. sclk_ff.full = rfixed_div(sclk_ff, temp_ff);
  1743. temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
  1744. temp_ff.full = rfixed_const(temp);
  1745. mem_bw.full = rfixed_mul(mclk_ff, temp_ff);
  1746. pix_clk.full = 0;
  1747. pix_clk2.full = 0;
  1748. peak_disp_bw.full = 0;
  1749. if (mode1) {
  1750. temp_ff.full = rfixed_const(1000);
  1751. pix_clk.full = rfixed_const(mode1->clock); /* convert to fixed point */
  1752. pix_clk.full = rfixed_div(pix_clk, temp_ff);
  1753. temp_ff.full = rfixed_const(pixel_bytes1);
  1754. peak_disp_bw.full += rfixed_mul(pix_clk, temp_ff);
  1755. }
  1756. if (mode2) {
  1757. temp_ff.full = rfixed_const(1000);
  1758. pix_clk2.full = rfixed_const(mode2->clock); /* convert to fixed point */
  1759. pix_clk2.full = rfixed_div(pix_clk2, temp_ff);
  1760. temp_ff.full = rfixed_const(pixel_bytes2);
  1761. peak_disp_bw.full += rfixed_mul(pix_clk2, temp_ff);
  1762. }
  1763. mem_bw.full = rfixed_mul(mem_bw, min_mem_eff);
  1764. if (peak_disp_bw.full >= mem_bw.full) {
  1765. DRM_ERROR("You may not have enough display bandwidth for current mode\n"
  1766. "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
  1767. }
  1768. /* Get values from the EXT_MEM_CNTL register...converting its contents. */
  1769. temp = RREG32(RADEON_MEM_TIMING_CNTL);
  1770. if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
  1771. mem_trcd = ((temp >> 2) & 0x3) + 1;
  1772. mem_trp = ((temp & 0x3)) + 1;
  1773. mem_tras = ((temp & 0x70) >> 4) + 1;
  1774. } else if (rdev->family == CHIP_R300 ||
  1775. rdev->family == CHIP_R350) { /* r300, r350 */
  1776. mem_trcd = (temp & 0x7) + 1;
  1777. mem_trp = ((temp >> 8) & 0x7) + 1;
  1778. mem_tras = ((temp >> 11) & 0xf) + 4;
  1779. } else if (rdev->family == CHIP_RV350 ||
  1780. rdev->family <= CHIP_RV380) {
  1781. /* rv3x0 */
  1782. mem_trcd = (temp & 0x7) + 3;
  1783. mem_trp = ((temp >> 8) & 0x7) + 3;
  1784. mem_tras = ((temp >> 11) & 0xf) + 6;
  1785. } else if (rdev->family == CHIP_R420 ||
  1786. rdev->family == CHIP_R423 ||
  1787. rdev->family == CHIP_RV410) {
  1788. /* r4xx */
  1789. mem_trcd = (temp & 0xf) + 3;
  1790. if (mem_trcd > 15)
  1791. mem_trcd = 15;
  1792. mem_trp = ((temp >> 8) & 0xf) + 3;
  1793. if (mem_trp > 15)
  1794. mem_trp = 15;
  1795. mem_tras = ((temp >> 12) & 0x1f) + 6;
  1796. if (mem_tras > 31)
  1797. mem_tras = 31;
  1798. } else { /* RV200, R200 */
  1799. mem_trcd = (temp & 0x7) + 1;
  1800. mem_trp = ((temp >> 8) & 0x7) + 1;
  1801. mem_tras = ((temp >> 12) & 0xf) + 4;
  1802. }
  1803. /* convert to FF */
  1804. trcd_ff.full = rfixed_const(mem_trcd);
  1805. trp_ff.full = rfixed_const(mem_trp);
  1806. tras_ff.full = rfixed_const(mem_tras);
  1807. /* Get values from the MEM_SDRAM_MODE_REG register...converting its */
  1808. temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
  1809. data = (temp & (7 << 20)) >> 20;
  1810. if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
  1811. if (rdev->family == CHIP_RS480) /* don't think rs400 */
  1812. tcas_ff = memtcas_rs480_ff[data];
  1813. else
  1814. tcas_ff = memtcas_ff[data];
  1815. } else
  1816. tcas_ff = memtcas2_ff[data];
  1817. if (rdev->family == CHIP_RS400 ||
  1818. rdev->family == CHIP_RS480) {
  1819. /* extra cas latency stored in bits 23-25 0-4 clocks */
  1820. data = (temp >> 23) & 0x7;
  1821. if (data < 5)
  1822. tcas_ff.full += rfixed_const(data);
  1823. }
  1824. if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
  1825. /* on the R300, Tcas is included in Trbs.
  1826. */
  1827. temp = RREG32(RADEON_MEM_CNTL);
  1828. data = (R300_MEM_NUM_CHANNELS_MASK & temp);
  1829. if (data == 1) {
  1830. if (R300_MEM_USE_CD_CH_ONLY & temp) {
  1831. temp = RREG32(R300_MC_IND_INDEX);
  1832. temp &= ~R300_MC_IND_ADDR_MASK;
  1833. temp |= R300_MC_READ_CNTL_CD_mcind;
  1834. WREG32(R300_MC_IND_INDEX, temp);
  1835. temp = RREG32(R300_MC_IND_DATA);
  1836. data = (R300_MEM_RBS_POSITION_C_MASK & temp);
  1837. } else {
  1838. temp = RREG32(R300_MC_READ_CNTL_AB);
  1839. data = (R300_MEM_RBS_POSITION_A_MASK & temp);
  1840. }
  1841. } else {
  1842. temp = RREG32(R300_MC_READ_CNTL_AB);
  1843. data = (R300_MEM_RBS_POSITION_A_MASK & temp);
  1844. }
  1845. if (rdev->family == CHIP_RV410 ||
  1846. rdev->family == CHIP_R420 ||
  1847. rdev->family == CHIP_R423)
  1848. trbs_ff = memtrbs_r4xx[data];
  1849. else
  1850. trbs_ff = memtrbs[data];
  1851. tcas_ff.full += trbs_ff.full;
  1852. }
  1853. sclk_eff_ff.full = sclk_ff.full;
  1854. if (rdev->flags & RADEON_IS_AGP) {
  1855. fixed20_12 agpmode_ff;
  1856. agpmode_ff.full = rfixed_const(radeon_agpmode);
  1857. temp_ff.full = rfixed_const_666(16);
  1858. sclk_eff_ff.full -= rfixed_mul(agpmode_ff, temp_ff);
  1859. }
  1860. /* TODO PCIE lanes may affect this - agpmode == 16?? */
  1861. if (ASIC_IS_R300(rdev)) {
  1862. sclk_delay_ff.full = rfixed_const(250);
  1863. } else {
  1864. if ((rdev->family == CHIP_RV100) ||
  1865. rdev->flags & RADEON_IS_IGP) {
  1866. if (rdev->mc.vram_is_ddr)
  1867. sclk_delay_ff.full = rfixed_const(41);
  1868. else
  1869. sclk_delay_ff.full = rfixed_const(33);
  1870. } else {
  1871. if (rdev->mc.vram_width == 128)
  1872. sclk_delay_ff.full = rfixed_const(57);
  1873. else
  1874. sclk_delay_ff.full = rfixed_const(41);
  1875. }
  1876. }
  1877. mc_latency_sclk.full = rfixed_div(sclk_delay_ff, sclk_eff_ff);
  1878. if (rdev->mc.vram_is_ddr) {
  1879. if (rdev->mc.vram_width == 32) {
  1880. k1.full = rfixed_const(40);
  1881. c = 3;
  1882. } else {
  1883. k1.full = rfixed_const(20);
  1884. c = 1;
  1885. }
  1886. } else {
  1887. k1.full = rfixed_const(40);
  1888. c = 3;
  1889. }
  1890. temp_ff.full = rfixed_const(2);
  1891. mc_latency_mclk.full = rfixed_mul(trcd_ff, temp_ff);
  1892. temp_ff.full = rfixed_const(c);
  1893. mc_latency_mclk.full += rfixed_mul(tcas_ff, temp_ff);
  1894. temp_ff.full = rfixed_const(4);
  1895. mc_latency_mclk.full += rfixed_mul(tras_ff, temp_ff);
  1896. mc_latency_mclk.full += rfixed_mul(trp_ff, temp_ff);
  1897. mc_latency_mclk.full += k1.full;
  1898. mc_latency_mclk.full = rfixed_div(mc_latency_mclk, mclk_ff);
  1899. mc_latency_mclk.full += rfixed_div(temp_ff, sclk_eff_ff);
  1900. /*
  1901. HW cursor time assuming worst case of full size colour cursor.
  1902. */
  1903. temp_ff.full = rfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
  1904. temp_ff.full += trcd_ff.full;
  1905. if (temp_ff.full < tras_ff.full)
  1906. temp_ff.full = tras_ff.full;
  1907. cur_latency_mclk.full = rfixed_div(temp_ff, mclk_ff);
  1908. temp_ff.full = rfixed_const(cur_size);
  1909. cur_latency_sclk.full = rfixed_div(temp_ff, sclk_eff_ff);
  1910. /*
  1911. Find the total latency for the display data.
  1912. */
  1913. disp_latency_overhead.full = rfixed_const(80);
  1914. disp_latency_overhead.full = rfixed_div(disp_latency_overhead, sclk_ff);
  1915. mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
  1916. mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
  1917. if (mc_latency_mclk.full > mc_latency_sclk.full)
  1918. disp_latency.full = mc_latency_mclk.full;
  1919. else
  1920. disp_latency.full = mc_latency_sclk.full;
  1921. /* setup Max GRPH_STOP_REQ default value */
  1922. if (ASIC_IS_RV100(rdev))
  1923. max_stop_req = 0x5c;
  1924. else
  1925. max_stop_req = 0x7c;
  1926. if (mode1) {
  1927. /* CRTC1
  1928. Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
  1929. GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
  1930. */
  1931. stop_req = mode1->hdisplay * pixel_bytes1 / 16;
  1932. if (stop_req > max_stop_req)
  1933. stop_req = max_stop_req;
  1934. /*
  1935. Find the drain rate of the display buffer.
  1936. */
  1937. temp_ff.full = rfixed_const((16/pixel_bytes1));
  1938. disp_drain_rate.full = rfixed_div(pix_clk, temp_ff);
  1939. /*
  1940. Find the critical point of the display buffer.
  1941. */
  1942. crit_point_ff.full = rfixed_mul(disp_drain_rate, disp_latency);
  1943. crit_point_ff.full += rfixed_const_half(0);
  1944. critical_point = rfixed_trunc(crit_point_ff);
  1945. if (rdev->disp_priority == 2) {
  1946. critical_point = 0;
  1947. }
  1948. /*
  1949. The critical point should never be above max_stop_req-4. Setting
  1950. GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
  1951. */
  1952. if (max_stop_req - critical_point < 4)
  1953. critical_point = 0;
  1954. if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
  1955. /* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
  1956. critical_point = 0x10;
  1957. }
  1958. temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
  1959. temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
  1960. temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
  1961. temp &= ~(RADEON_GRPH_START_REQ_MASK);
  1962. if ((rdev->family == CHIP_R350) &&
  1963. (stop_req > 0x15)) {
  1964. stop_req -= 0x10;
  1965. }
  1966. temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
  1967. temp |= RADEON_GRPH_BUFFER_SIZE;
  1968. temp &= ~(RADEON_GRPH_CRITICAL_CNTL |
  1969. RADEON_GRPH_CRITICAL_AT_SOF |
  1970. RADEON_GRPH_STOP_CNTL);
  1971. /*
  1972. Write the result into the register.
  1973. */
  1974. WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
  1975. (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
  1976. #if 0
  1977. if ((rdev->family == CHIP_RS400) ||
  1978. (rdev->family == CHIP_RS480)) {
  1979. /* attempt to program RS400 disp regs correctly ??? */
  1980. temp = RREG32(RS400_DISP1_REG_CNTL);
  1981. temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
  1982. RS400_DISP1_STOP_REQ_LEVEL_MASK);
  1983. WREG32(RS400_DISP1_REQ_CNTL1, (temp |
  1984. (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
  1985. (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
  1986. temp = RREG32(RS400_DMIF_MEM_CNTL1);
  1987. temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
  1988. RS400_DISP1_CRITICAL_POINT_STOP_MASK);
  1989. WREG32(RS400_DMIF_MEM_CNTL1, (temp |
  1990. (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
  1991. (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
  1992. }
  1993. #endif
  1994. DRM_DEBUG("GRPH_BUFFER_CNTL from to %x\n",
  1995. /* (unsigned int)info->SavedReg->grph_buffer_cntl, */
  1996. (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
  1997. }
  1998. if (mode2) {
  1999. u32 grph2_cntl;
  2000. stop_req = mode2->hdisplay * pixel_bytes2 / 16;
  2001. if (stop_req > max_stop_req)
  2002. stop_req = max_stop_req;
  2003. /*
  2004. Find the drain rate of the display buffer.
  2005. */
  2006. temp_ff.full = rfixed_const((16/pixel_bytes2));
  2007. disp_drain_rate2.full = rfixed_div(pix_clk2, temp_ff);
  2008. grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
  2009. grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
  2010. grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
  2011. grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
  2012. if ((rdev->family == CHIP_R350) &&
  2013. (stop_req > 0x15)) {
  2014. stop_req -= 0x10;
  2015. }
  2016. grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
  2017. grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
  2018. grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL |
  2019. RADEON_GRPH_CRITICAL_AT_SOF |
  2020. RADEON_GRPH_STOP_CNTL);
  2021. if ((rdev->family == CHIP_RS100) ||
  2022. (rdev->family == CHIP_RS200))
  2023. critical_point2 = 0;
  2024. else {
  2025. temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
  2026. temp_ff.full = rfixed_const(temp);
  2027. temp_ff.full = rfixed_mul(mclk_ff, temp_ff);
  2028. if (sclk_ff.full < temp_ff.full)
  2029. temp_ff.full = sclk_ff.full;
  2030. read_return_rate.full = temp_ff.full;
  2031. if (mode1) {
  2032. temp_ff.full = read_return_rate.full - disp_drain_rate.full;
  2033. time_disp1_drop_priority.full = rfixed_div(crit_point_ff, temp_ff);
  2034. } else {
  2035. time_disp1_drop_priority.full = 0;
  2036. }
  2037. crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
  2038. crit_point_ff.full = rfixed_mul(crit_point_ff, disp_drain_rate2);
  2039. crit_point_ff.full += rfixed_const_half(0);
  2040. critical_point2 = rfixed_trunc(crit_point_ff);
  2041. if (rdev->disp_priority == 2) {
  2042. critical_point2 = 0;
  2043. }
  2044. if (max_stop_req - critical_point2 < 4)
  2045. critical_point2 = 0;
  2046. }
  2047. if (critical_point2 == 0 && rdev->family == CHIP_R300) {
  2048. /* some R300 cards have problem with this set to 0 */
  2049. critical_point2 = 0x10;
  2050. }
  2051. WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
  2052. (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
  2053. if ((rdev->family == CHIP_RS400) ||
  2054. (rdev->family == CHIP_RS480)) {
  2055. #if 0
  2056. /* attempt to program RS400 disp2 regs correctly ??? */
  2057. temp = RREG32(RS400_DISP2_REQ_CNTL1);
  2058. temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
  2059. RS400_DISP2_STOP_REQ_LEVEL_MASK);
  2060. WREG32(RS400_DISP2_REQ_CNTL1, (temp |
  2061. (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
  2062. (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
  2063. temp = RREG32(RS400_DISP2_REQ_CNTL2);
  2064. temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
  2065. RS400_DISP2_CRITICAL_POINT_STOP_MASK);
  2066. WREG32(RS400_DISP2_REQ_CNTL2, (temp |
  2067. (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
  2068. (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
  2069. #endif
  2070. WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
  2071. WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
  2072. WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC);
  2073. WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
  2074. }
  2075. DRM_DEBUG("GRPH2_BUFFER_CNTL from to %x\n",
  2076. (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
  2077. }
  2078. }