bte.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. #include <linux/config.h>
  9. #include <linux/module.h>
  10. #include <asm/sn/nodepda.h>
  11. #include <asm/sn/addrs.h>
  12. #include <asm/sn/arch.h>
  13. #include <asm/sn/sn_cpuid.h>
  14. #include <asm/sn/pda.h>
  15. #include <asm/sn/shubio.h>
  16. #include <asm/nodedata.h>
  17. #include <asm/delay.h>
  18. #include <linux/bootmem.h>
  19. #include <linux/string.h>
  20. #include <linux/sched.h>
  21. #include <asm/sn/bte.h>
  22. #ifndef L1_CACHE_MASK
  23. #define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
  24. #endif
  25. /* two interfaces on two btes */
  26. #define MAX_INTERFACES_TO_TRY 4
  27. static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
  28. {
  29. nodepda_t *tmp_nodepda;
  30. tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
  31. return &tmp_nodepda->bte_if[interface];
  32. }
  33. /************************************************************************
  34. * Block Transfer Engine copy related functions.
  35. *
  36. ***********************************************************************/
  37. /*
  38. * bte_copy(src, dest, len, mode, notification)
  39. *
  40. * Use the block transfer engine to move kernel memory from src to dest
  41. * using the assigned mode.
  42. *
  43. * Paramaters:
  44. * src - physical address of the transfer source.
  45. * dest - physical address of the transfer destination.
  46. * len - number of bytes to transfer from source to dest.
  47. * mode - hardware defined. See reference information
  48. * for IBCT0/1 in the SHUB Programmers Reference
  49. * notification - kernel virtual address of the notification cache
  50. * line. If NULL, the default is used and
  51. * the bte_copy is synchronous.
  52. *
  53. * NOTE: This function requires src, dest, and len to
  54. * be cacheline aligned.
  55. */
  56. bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
  57. {
  58. u64 transfer_size;
  59. u64 transfer_stat;
  60. struct bteinfo_s *bte;
  61. bte_result_t bte_status;
  62. unsigned long irq_flags;
  63. unsigned long itc_end = 0;
  64. struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY];
  65. int bte_if_index;
  66. int bte_pri, bte_sec;
  67. BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
  68. src, dest, len, mode, notification));
  69. if (len == 0) {
  70. return BTE_SUCCESS;
  71. }
  72. BUG_ON((len & L1_CACHE_MASK) ||
  73. (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK));
  74. BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));
  75. /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */
  76. if (cpuid_to_subnode(smp_processor_id()) == 0) {
  77. bte_pri = 0;
  78. bte_sec = 1;
  79. } else {
  80. bte_pri = 1;
  81. bte_sec = 0;
  82. }
  83. if (mode & BTE_USE_DEST) {
  84. /* try remote then local */
  85. btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri);
  86. btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec);
  87. if (mode & BTE_USE_ANY) {
  88. btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri);
  89. btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec);
  90. } else {
  91. btes_to_try[2] = NULL;
  92. btes_to_try[3] = NULL;
  93. }
  94. } else {
  95. /* try local then remote */
  96. btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri);
  97. btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec);
  98. if (mode & BTE_USE_ANY) {
  99. btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri);
  100. btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec);
  101. } else {
  102. btes_to_try[2] = NULL;
  103. btes_to_try[3] = NULL;
  104. }
  105. }
  106. retry_bteop:
  107. do {
  108. local_irq_save(irq_flags);
  109. bte_if_index = 0;
  110. /* Attempt to lock one of the BTE interfaces. */
  111. while (bte_if_index < MAX_INTERFACES_TO_TRY) {
  112. bte = btes_to_try[bte_if_index++];
  113. if (bte == NULL) {
  114. continue;
  115. }
  116. if (spin_trylock(&bte->spinlock)) {
  117. if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
  118. (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
  119. /* Got the lock but BTE still busy */
  120. spin_unlock(&bte->spinlock);
  121. } else {
  122. /* we got the lock and it's not busy */
  123. break;
  124. }
  125. }
  126. bte = NULL;
  127. }
  128. if (bte != NULL) {
  129. break;
  130. }
  131. local_irq_restore(irq_flags);
  132. if (!(mode & BTE_WACQUIRE)) {
  133. return BTEFAIL_NOTAVAIL;
  134. }
  135. } while (1);
  136. if (notification == NULL) {
  137. /* User does not want to be notified. */
  138. bte->most_rcnt_na = &bte->notify;
  139. } else {
  140. bte->most_rcnt_na = notification;
  141. }
  142. /* Calculate the number of cache lines to transfer. */
  143. transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
  144. /* Initialize the notification to a known value. */
  145. *bte->most_rcnt_na = BTE_WORD_BUSY;
  146. /* Set the source and destination registers */
  147. BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
  148. BTE_SRC_STORE(bte, TO_PHYS(src));
  149. BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
  150. BTE_DEST_STORE(bte, TO_PHYS(dest));
  151. /* Set the notification register */
  152. BTE_PRINTKV(("IBNA = 0x%lx)\n",
  153. TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))));
  154. BTE_NOTIF_STORE(bte,
  155. TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)));
  156. /* Initiate the transfer */
  157. BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
  158. BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode));
  159. itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
  160. spin_unlock_irqrestore(&bte->spinlock, irq_flags);
  161. if (notification != NULL) {
  162. return BTE_SUCCESS;
  163. }
  164. while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
  165. if (ia64_get_itc() > itc_end) {
  166. BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
  167. NASID_GET(bte->bte_base_addr), bte->bte_num,
  168. BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
  169. bte->bte_error_count++;
  170. bte->bh_error = IBLS_ERROR;
  171. bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
  172. *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
  173. goto retry_bteop;
  174. }
  175. }
  176. BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
  177. BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
  178. if (transfer_stat & IBLS_ERROR) {
  179. bte_status = transfer_stat & ~IBLS_ERROR;
  180. } else {
  181. bte_status = BTE_SUCCESS;
  182. }
  183. *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
  184. BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
  185. BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
  186. return bte_status;
  187. }
  188. EXPORT_SYMBOL(bte_copy);
  189. /*
  190. * bte_unaligned_copy(src, dest, len, mode)
  191. *
  192. * use the block transfer engine to move kernel
  193. * memory from src to dest using the assigned mode.
  194. *
  195. * Paramaters:
  196. * src - physical address of the transfer source.
  197. * dest - physical address of the transfer destination.
  198. * len - number of bytes to transfer from source to dest.
  199. * mode - hardware defined. See reference information
  200. * for IBCT0/1 in the SGI documentation.
  201. *
  202. * NOTE: If the source, dest, and len are all cache line aligned,
  203. * then it would be _FAR_ preferrable to use bte_copy instead.
  204. */
  205. bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
  206. {
  207. int destFirstCacheOffset;
  208. u64 headBteSource;
  209. u64 headBteLen;
  210. u64 headBcopySrcOffset;
  211. u64 headBcopyDest;
  212. u64 headBcopyLen;
  213. u64 footBteSource;
  214. u64 footBteLen;
  215. u64 footBcopyDest;
  216. u64 footBcopyLen;
  217. bte_result_t rv;
  218. char *bteBlock, *bteBlock_unaligned;
  219. if (len == 0) {
  220. return BTE_SUCCESS;
  221. }
  222. /* temporary buffer used during unaligned transfers */
  223. bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
  224. GFP_KERNEL | GFP_DMA);
  225. if (bteBlock_unaligned == NULL) {
  226. return BTEFAIL_NOTAVAIL;
  227. }
  228. bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);
  229. headBcopySrcOffset = src & L1_CACHE_MASK;
  230. destFirstCacheOffset = dest & L1_CACHE_MASK;
  231. /*
  232. * At this point, the transfer is broken into
  233. * (up to) three sections. The first section is
  234. * from the start address to the first physical
  235. * cache line, the second is from the first physical
  236. * cache line to the last complete cache line,
  237. * and the third is from the last cache line to the
  238. * end of the buffer. The first and third sections
  239. * are handled by bte copying into a temporary buffer
  240. * and then bcopy'ing the necessary section into the
  241. * final location. The middle section is handled with
  242. * a standard bte copy.
  243. *
  244. * One nasty exception to the above rule is when the
  245. * source and destination are not symetrically
  246. * mis-aligned. If the source offset from the first
  247. * cache line is different from the destination offset,
  248. * we make the first section be the entire transfer
  249. * and the bcopy the entire block into place.
  250. */
  251. if (headBcopySrcOffset == destFirstCacheOffset) {
  252. /*
  253. * Both the source and destination are the same
  254. * distance from a cache line boundary so we can
  255. * use the bte to transfer the bulk of the
  256. * data.
  257. */
  258. headBteSource = src & ~L1_CACHE_MASK;
  259. headBcopyDest = dest;
  260. if (headBcopySrcOffset) {
  261. headBcopyLen =
  262. (len >
  263. (L1_CACHE_BYTES -
  264. headBcopySrcOffset) ? L1_CACHE_BYTES
  265. - headBcopySrcOffset : len);
  266. headBteLen = L1_CACHE_BYTES;
  267. } else {
  268. headBcopyLen = 0;
  269. headBteLen = 0;
  270. }
  271. if (len > headBcopyLen) {
  272. footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
  273. footBteLen = L1_CACHE_BYTES;
  274. footBteSource = src + len - footBcopyLen;
  275. footBcopyDest = dest + len - footBcopyLen;
  276. if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
  277. /*
  278. * We have two contigous bcopy
  279. * blocks. Merge them.
  280. */
  281. headBcopyLen += footBcopyLen;
  282. headBteLen += footBteLen;
  283. } else if (footBcopyLen > 0) {
  284. rv = bte_copy(footBteSource,
  285. ia64_tpa((unsigned long)bteBlock),
  286. footBteLen, mode, NULL);
  287. if (rv != BTE_SUCCESS) {
  288. kfree(bteBlock_unaligned);
  289. return rv;
  290. }
  291. memcpy(__va(footBcopyDest),
  292. (char *)bteBlock, footBcopyLen);
  293. }
  294. } else {
  295. footBcopyLen = 0;
  296. footBteLen = 0;
  297. }
  298. if (len > (headBcopyLen + footBcopyLen)) {
  299. /* now transfer the middle. */
  300. rv = bte_copy((src + headBcopyLen),
  301. (dest +
  302. headBcopyLen),
  303. (len - headBcopyLen -
  304. footBcopyLen), mode, NULL);
  305. if (rv != BTE_SUCCESS) {
  306. kfree(bteBlock_unaligned);
  307. return rv;
  308. }
  309. }
  310. } else {
  311. /*
  312. * The transfer is not symetric, we will
  313. * allocate a buffer large enough for all the
  314. * data, bte_copy into that buffer and then
  315. * bcopy to the destination.
  316. */
  317. /* Add the leader from source */
  318. headBteLen = len + (src & L1_CACHE_MASK);
  319. /* Add the trailing bytes from footer. */
  320. headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
  321. headBteSource = src & ~L1_CACHE_MASK;
  322. headBcopySrcOffset = src & L1_CACHE_MASK;
  323. headBcopyDest = dest;
  324. headBcopyLen = len;
  325. }
  326. if (headBcopyLen > 0) {
  327. rv = bte_copy(headBteSource,
  328. ia64_tpa((unsigned long)bteBlock), headBteLen,
  329. mode, NULL);
  330. if (rv != BTE_SUCCESS) {
  331. kfree(bteBlock_unaligned);
  332. return rv;
  333. }
  334. memcpy(__va(headBcopyDest), ((char *)bteBlock +
  335. headBcopySrcOffset), headBcopyLen);
  336. }
  337. kfree(bteBlock_unaligned);
  338. return BTE_SUCCESS;
  339. }
  340. EXPORT_SYMBOL(bte_unaligned_copy);
  341. /************************************************************************
  342. * Block Transfer Engine initialization functions.
  343. *
  344. ***********************************************************************/
  345. /*
  346. * bte_init_node(nodepda, cnode)
  347. *
  348. * Initialize the nodepda structure with BTE base addresses and
  349. * spinlocks.
  350. */
  351. void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
  352. {
  353. int i;
  354. /*
  355. * Indicate that all the block transfer engines on this node
  356. * are available.
  357. */
  358. /*
  359. * Allocate one bte_recover_t structure per node. It holds
  360. * the recovery lock for node. All the bte interface structures
  361. * will point at this one bte_recover structure to get the lock.
  362. */
  363. spin_lock_init(&mynodepda->bte_recovery_lock);
  364. init_timer(&mynodepda->bte_recovery_timer);
  365. mynodepda->bte_recovery_timer.function = bte_error_handler;
  366. mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
  367. for (i = 0; i < BTES_PER_NODE; i++) {
  368. u64 *base_addr;
  369. /* Which link status register should we use? */
  370. base_addr = (u64 *)
  371. REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i));
  372. mynodepda->bte_if[i].bte_base_addr = base_addr;
  373. mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr);
  374. mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr);
  375. mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr);
  376. mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr);
  377. /*
  378. * Initialize the notification and spinlock
  379. * so the first transfer can occur.
  380. */
  381. mynodepda->bte_if[i].most_rcnt_na =
  382. &(mynodepda->bte_if[i].notify);
  383. mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
  384. spin_lock_init(&mynodepda->bte_if[i].spinlock);
  385. mynodepda->bte_if[i].bte_cnode = cnode;
  386. mynodepda->bte_if[i].bte_error_count = 0;
  387. mynodepda->bte_if[i].bte_num = i;
  388. mynodepda->bte_if[i].cleanup_active = 0;
  389. mynodepda->bte_if[i].bh_error = 0;
  390. }
  391. }