bte.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. #include <linux/config.h>
  9. #include <linux/module.h>
  10. #include <asm/sn/nodepda.h>
  11. #include <asm/sn/addrs.h>
  12. #include <asm/sn/arch.h>
  13. #include <asm/sn/sn_cpuid.h>
  14. #include <asm/sn/pda.h>
  15. #include <asm/sn/shubio.h>
  16. #include <asm/nodedata.h>
  17. #include <asm/delay.h>
  18. #include <linux/bootmem.h>
  19. #include <linux/string.h>
  20. #include <linux/sched.h>
  21. #include <asm/sn/bte.h>
  22. #ifndef L1_CACHE_MASK
  23. #define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
  24. #endif
  25. /* two interfaces on two btes */
  26. #define MAX_INTERFACES_TO_TRY 4
  27. static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
  28. {
  29. nodepda_t *tmp_nodepda;
  30. tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
  31. return &tmp_nodepda->bte_if[interface];
  32. }
  33. /************************************************************************
  34. * Block Transfer Engine copy related functions.
  35. *
  36. ***********************************************************************/
  37. /*
  38. * bte_copy(src, dest, len, mode, notification)
  39. *
  40. * Use the block transfer engine to move kernel memory from src to dest
  41. * using the assigned mode.
  42. *
  43. * Paramaters:
  44. * src - physical address of the transfer source.
  45. * dest - physical address of the transfer destination.
  46. * len - number of bytes to transfer from source to dest.
  47. * mode - hardware defined. See reference information
  48. * for IBCT0/1 in the SHUB Programmers Reference
  49. * notification - kernel virtual address of the notification cache
  50. * line. If NULL, the default is used and
  51. * the bte_copy is synchronous.
  52. *
  53. * NOTE: This function requires src, dest, and len to
  54. * be cacheline aligned.
  55. */
  56. bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
  57. {
  58. u64 transfer_size;
  59. u64 transfer_stat;
  60. struct bteinfo_s *bte;
  61. bte_result_t bte_status;
  62. unsigned long irq_flags;
  63. unsigned long itc_end = 0;
  64. struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY];
  65. int bte_if_index;
  66. int bte_pri, bte_sec;
  67. BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
  68. src, dest, len, mode, notification));
  69. if (len == 0) {
  70. return BTE_SUCCESS;
  71. }
  72. BUG_ON((len & L1_CACHE_MASK) ||
  73. (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK));
  74. BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));
  75. /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */
  76. if (cpuid_to_subnode(smp_processor_id()) == 0) {
  77. bte_pri = 0;
  78. bte_sec = 1;
  79. } else {
  80. bte_pri = 1;
  81. bte_sec = 0;
  82. }
  83. if (mode & BTE_USE_DEST) {
  84. /* try remote then local */
  85. btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri);
  86. btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec);
  87. if (mode & BTE_USE_ANY) {
  88. btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri);
  89. btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec);
  90. } else {
  91. btes_to_try[2] = NULL;
  92. btes_to_try[3] = NULL;
  93. }
  94. } else {
  95. /* try local then remote */
  96. btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri);
  97. btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec);
  98. if (mode & BTE_USE_ANY) {
  99. btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri);
  100. btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec);
  101. } else {
  102. btes_to_try[2] = NULL;
  103. btes_to_try[3] = NULL;
  104. }
  105. }
  106. retry_bteop:
  107. do {
  108. local_irq_save(irq_flags);
  109. bte_if_index = 0;
  110. /* Attempt to lock one of the BTE interfaces. */
  111. while (bte_if_index < MAX_INTERFACES_TO_TRY) {
  112. bte = btes_to_try[bte_if_index++];
  113. if (bte == NULL) {
  114. continue;
  115. }
  116. if (spin_trylock(&bte->spinlock)) {
  117. if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
  118. (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
  119. /* Got the lock but BTE still busy */
  120. spin_unlock(&bte->spinlock);
  121. } else {
  122. /* we got the lock and it's not busy */
  123. break;
  124. }
  125. }
  126. bte = NULL;
  127. }
  128. if (bte != NULL) {
  129. break;
  130. }
  131. local_irq_restore(irq_flags);
  132. if (!(mode & BTE_WACQUIRE)) {
  133. return BTEFAIL_NOTAVAIL;
  134. }
  135. } while (1);
  136. if (notification == NULL) {
  137. /* User does not want to be notified. */
  138. bte->most_rcnt_na = &bte->notify;
  139. } else {
  140. bte->most_rcnt_na = notification;
  141. }
  142. /* Calculate the number of cache lines to transfer. */
  143. transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
  144. /* Initialize the notification to a known value. */
  145. *bte->most_rcnt_na = BTE_WORD_BUSY;
  146. /* Set the status reg busy bit and transfer length */
  147. BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size));
  148. BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size);
  149. /* Set the source and destination registers */
  150. BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
  151. BTE_SRC_STORE(bte, TO_PHYS(src));
  152. BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
  153. BTE_DEST_STORE(bte, TO_PHYS(dest));
  154. /* Set the notification register */
  155. BTE_PRINTKV(("IBNA = 0x%lx)\n",
  156. TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))));
  157. BTE_NOTIF_STORE(bte,
  158. TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)));
  159. /* Initiate the transfer */
  160. BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
  161. BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode));
  162. itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
  163. spin_unlock_irqrestore(&bte->spinlock, irq_flags);
  164. if (notification != NULL) {
  165. return BTE_SUCCESS;
  166. }
  167. while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
  168. if (ia64_get_itc() > itc_end) {
  169. BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
  170. NASID_GET(bte->bte_base_addr), bte->bte_num,
  171. BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
  172. bte->bte_error_count++;
  173. bte->bh_error = IBLS_ERROR;
  174. bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
  175. *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
  176. goto retry_bteop;
  177. }
  178. }
  179. BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
  180. BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
  181. if (transfer_stat & IBLS_ERROR) {
  182. bte_status = transfer_stat & ~IBLS_ERROR;
  183. } else {
  184. bte_status = BTE_SUCCESS;
  185. }
  186. *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
  187. BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
  188. BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
  189. return bte_status;
  190. }
  191. EXPORT_SYMBOL(bte_copy);
  192. /*
  193. * bte_unaligned_copy(src, dest, len, mode)
  194. *
  195. * use the block transfer engine to move kernel
  196. * memory from src to dest using the assigned mode.
  197. *
  198. * Paramaters:
  199. * src - physical address of the transfer source.
  200. * dest - physical address of the transfer destination.
  201. * len - number of bytes to transfer from source to dest.
  202. * mode - hardware defined. See reference information
  203. * for IBCT0/1 in the SGI documentation.
  204. *
  205. * NOTE: If the source, dest, and len are all cache line aligned,
  206. * then it would be _FAR_ preferrable to use bte_copy instead.
  207. */
  208. bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
  209. {
  210. int destFirstCacheOffset;
  211. u64 headBteSource;
  212. u64 headBteLen;
  213. u64 headBcopySrcOffset;
  214. u64 headBcopyDest;
  215. u64 headBcopyLen;
  216. u64 footBteSource;
  217. u64 footBteLen;
  218. u64 footBcopyDest;
  219. u64 footBcopyLen;
  220. bte_result_t rv;
  221. char *bteBlock, *bteBlock_unaligned;
  222. if (len == 0) {
  223. return BTE_SUCCESS;
  224. }
  225. /* temporary buffer used during unaligned transfers */
  226. bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
  227. GFP_KERNEL | GFP_DMA);
  228. if (bteBlock_unaligned == NULL) {
  229. return BTEFAIL_NOTAVAIL;
  230. }
  231. bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);
  232. headBcopySrcOffset = src & L1_CACHE_MASK;
  233. destFirstCacheOffset = dest & L1_CACHE_MASK;
  234. /*
  235. * At this point, the transfer is broken into
  236. * (up to) three sections. The first section is
  237. * from the start address to the first physical
  238. * cache line, the second is from the first physical
  239. * cache line to the last complete cache line,
  240. * and the third is from the last cache line to the
  241. * end of the buffer. The first and third sections
  242. * are handled by bte copying into a temporary buffer
  243. * and then bcopy'ing the necessary section into the
  244. * final location. The middle section is handled with
  245. * a standard bte copy.
  246. *
  247. * One nasty exception to the above rule is when the
  248. * source and destination are not symetrically
  249. * mis-aligned. If the source offset from the first
  250. * cache line is different from the destination offset,
  251. * we make the first section be the entire transfer
  252. * and the bcopy the entire block into place.
  253. */
  254. if (headBcopySrcOffset == destFirstCacheOffset) {
  255. /*
  256. * Both the source and destination are the same
  257. * distance from a cache line boundary so we can
  258. * use the bte to transfer the bulk of the
  259. * data.
  260. */
  261. headBteSource = src & ~L1_CACHE_MASK;
  262. headBcopyDest = dest;
  263. if (headBcopySrcOffset) {
  264. headBcopyLen =
  265. (len >
  266. (L1_CACHE_BYTES -
  267. headBcopySrcOffset) ? L1_CACHE_BYTES
  268. - headBcopySrcOffset : len);
  269. headBteLen = L1_CACHE_BYTES;
  270. } else {
  271. headBcopyLen = 0;
  272. headBteLen = 0;
  273. }
  274. if (len > headBcopyLen) {
  275. footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
  276. footBteLen = L1_CACHE_BYTES;
  277. footBteSource = src + len - footBcopyLen;
  278. footBcopyDest = dest + len - footBcopyLen;
  279. if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
  280. /*
  281. * We have two contigous bcopy
  282. * blocks. Merge them.
  283. */
  284. headBcopyLen += footBcopyLen;
  285. headBteLen += footBteLen;
  286. } else if (footBcopyLen > 0) {
  287. rv = bte_copy(footBteSource,
  288. ia64_tpa((unsigned long)bteBlock),
  289. footBteLen, mode, NULL);
  290. if (rv != BTE_SUCCESS) {
  291. kfree(bteBlock_unaligned);
  292. return rv;
  293. }
  294. memcpy(__va(footBcopyDest),
  295. (char *)bteBlock, footBcopyLen);
  296. }
  297. } else {
  298. footBcopyLen = 0;
  299. footBteLen = 0;
  300. }
  301. if (len > (headBcopyLen + footBcopyLen)) {
  302. /* now transfer the middle. */
  303. rv = bte_copy((src + headBcopyLen),
  304. (dest +
  305. headBcopyLen),
  306. (len - headBcopyLen -
  307. footBcopyLen), mode, NULL);
  308. if (rv != BTE_SUCCESS) {
  309. kfree(bteBlock_unaligned);
  310. return rv;
  311. }
  312. }
  313. } else {
  314. /*
  315. * The transfer is not symetric, we will
  316. * allocate a buffer large enough for all the
  317. * data, bte_copy into that buffer and then
  318. * bcopy to the destination.
  319. */
  320. /* Add the leader from source */
  321. headBteLen = len + (src & L1_CACHE_MASK);
  322. /* Add the trailing bytes from footer. */
  323. headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
  324. headBteSource = src & ~L1_CACHE_MASK;
  325. headBcopySrcOffset = src & L1_CACHE_MASK;
  326. headBcopyDest = dest;
  327. headBcopyLen = len;
  328. }
  329. if (headBcopyLen > 0) {
  330. rv = bte_copy(headBteSource,
  331. ia64_tpa((unsigned long)bteBlock), headBteLen,
  332. mode, NULL);
  333. if (rv != BTE_SUCCESS) {
  334. kfree(bteBlock_unaligned);
  335. return rv;
  336. }
  337. memcpy(__va(headBcopyDest), ((char *)bteBlock +
  338. headBcopySrcOffset), headBcopyLen);
  339. }
  340. kfree(bteBlock_unaligned);
  341. return BTE_SUCCESS;
  342. }
  343. EXPORT_SYMBOL(bte_unaligned_copy);
  344. /************************************************************************
  345. * Block Transfer Engine initialization functions.
  346. *
  347. ***********************************************************************/
  348. /*
  349. * bte_init_node(nodepda, cnode)
  350. *
  351. * Initialize the nodepda structure with BTE base addresses and
  352. * spinlocks.
  353. */
  354. void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
  355. {
  356. int i;
  357. /*
  358. * Indicate that all the block transfer engines on this node
  359. * are available.
  360. */
  361. /*
  362. * Allocate one bte_recover_t structure per node. It holds
  363. * the recovery lock for node. All the bte interface structures
  364. * will point at this one bte_recover structure to get the lock.
  365. */
  366. spin_lock_init(&mynodepda->bte_recovery_lock);
  367. init_timer(&mynodepda->bte_recovery_timer);
  368. mynodepda->bte_recovery_timer.function = bte_error_handler;
  369. mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
  370. for (i = 0; i < BTES_PER_NODE; i++) {
  371. /* Which link status register should we use? */
  372. unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1);
  373. mynodepda->bte_if[i].bte_base_addr = (u64 *)
  374. REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status);
  375. /*
  376. * Initialize the notification and spinlock
  377. * so the first transfer can occur.
  378. */
  379. mynodepda->bte_if[i].most_rcnt_na =
  380. &(mynodepda->bte_if[i].notify);
  381. mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
  382. spin_lock_init(&mynodepda->bte_if[i].spinlock);
  383. mynodepda->bte_if[i].bte_cnode = cnode;
  384. mynodepda->bte_if[i].bte_error_count = 0;
  385. mynodepda->bte_if[i].bte_num = i;
  386. mynodepda->bte_if[i].cleanup_active = 0;
  387. mynodepda->bte_if[i].bh_error = 0;
  388. }
  389. }