pg-sb1.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /*
  2. * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
  3. * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
  4. * Copyright (C) 2000 SiByte, Inc.
  5. * Copyright (C) 2005 Thiemo Seufer
  6. *
  7. * Written by Justin Carlson of SiByte, Inc.
  8. * and Kip Walker of Broadcom Corp.
  9. *
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License
  13. * as published by the Free Software Foundation; either version 2
  14. * of the License, or (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; if not, write to the Free Software
  23. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  24. */
  25. #include <linux/module.h>
  26. #include <linux/sched.h>
  27. #include <linux/smp.h>
  28. #include <asm/io.h>
  29. #include <asm/sibyte/sb1250.h>
  30. #include <asm/sibyte/sb1250_regs.h>
  31. #include <asm/sibyte/sb1250_dma.h>
  32. #ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
  33. #define SB1_PREF_LOAD_STREAMED_HINT "0"
  34. #define SB1_PREF_STORE_STREAMED_HINT "1"
  35. #else
  36. #define SB1_PREF_LOAD_STREAMED_HINT "4"
  37. #define SB1_PREF_STORE_STREAMED_HINT "5"
  38. #endif
  39. static inline void clear_page_cpu(void *page)
  40. {
  41. unsigned char *addr = (unsigned char *) page;
  42. unsigned char *end = addr + PAGE_SIZE;
  43. /*
  44. * JDCXXX - This should be bottlenecked by the write buffer, but these
  45. * things tend to be mildly unpredictable...should check this on the
  46. * performance model
  47. *
  48. * We prefetch 4 lines ahead. We're also "cheating" slightly here...
  49. * since we know we're on an SB1, we force the assembler to take
  50. * 64-bit operands to speed things up
  51. */
  52. __asm__ __volatile__(
  53. " .set push \n"
  54. " .set mips4 \n"
  55. " .set noreorder \n"
  56. #ifdef CONFIG_CPU_HAS_PREFETCH
  57. " daddiu %0, %0, 128 \n"
  58. " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n"
  59. /* Prefetch the first 4 lines */
  60. " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n"
  61. " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n"
  62. " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
  63. "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */
  64. " sd $0, -120(%0) \n"
  65. " sd $0, -112(%0) \n"
  66. " sd $0, -104(%0) \n"
  67. " daddiu %0, %0, 32 \n"
  68. " bnel %0, %1, 1b \n"
  69. " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
  70. " daddiu %0, %0, -128 \n"
  71. #endif
  72. " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */
  73. "1: sd $0, 8(%0) \n"
  74. " sd $0, 16(%0) \n"
  75. " sd $0, 24(%0) \n"
  76. " daddiu %0, %0, 32 \n"
  77. " bnel %0, %1, 1b \n"
  78. " sd $0, 0(%0) \n"
  79. " .set pop \n"
  80. : "+r" (addr)
  81. : "r" (end)
  82. : "memory");
  83. }
  84. static inline void copy_page_cpu(void *to, void *from)
  85. {
  86. unsigned char *src = (unsigned char *)from;
  87. unsigned char *dst = (unsigned char *)to;
  88. unsigned char *end = src + PAGE_SIZE;
  89. /*
  90. * The pref's used here are using "streaming" hints, which cause the
  91. * copied data to be kicked out of the cache sooner. A page copy often
  92. * ends up copying a lot more data than is commonly used, so this seems
  93. * to make sense in terms of reducing cache pollution, but I've no real
  94. * performance data to back this up
  95. */
  96. __asm__ __volatile__(
  97. " .set push \n"
  98. " .set mips4 \n"
  99. " .set noreorder \n"
  100. #ifdef CONFIG_CPU_HAS_PREFETCH
  101. " daddiu %0, %0, 128 \n"
  102. " daddiu %1, %1, 128 \n"
  103. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n"
  104. /* Prefetch the first 4 lines */
  105. " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
  106. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n"
  107. " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n"
  108. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n"
  109. " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n"
  110. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
  111. "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n"
  112. # ifdef CONFIG_64BIT
  113. " ld $8, -128(%0) \n" /* Block copy a cacheline */
  114. " ld $9, -120(%0) \n"
  115. " ld $10, -112(%0) \n"
  116. " ld $11, -104(%0) \n"
  117. " sd $8, -128(%1) \n"
  118. " sd $9, -120(%1) \n"
  119. " sd $10, -112(%1) \n"
  120. " sd $11, -104(%1) \n"
  121. # else
  122. " lw $2, -128(%0) \n" /* Block copy a cacheline */
  123. " lw $3, -124(%0) \n"
  124. " lw $6, -120(%0) \n"
  125. " lw $7, -116(%0) \n"
  126. " lw $8, -112(%0) \n"
  127. " lw $9, -108(%0) \n"
  128. " lw $10, -104(%0) \n"
  129. " lw $11, -100(%0) \n"
  130. " sw $2, -128(%1) \n"
  131. " sw $3, -124(%1) \n"
  132. " sw $6, -120(%1) \n"
  133. " sw $7, -116(%1) \n"
  134. " sw $8, -112(%1) \n"
  135. " sw $9, -108(%1) \n"
  136. " sw $10, -104(%1) \n"
  137. " sw $11, -100(%1) \n"
  138. # endif
  139. " daddiu %0, %0, 32 \n"
  140. " daddiu %1, %1, 32 \n"
  141. " bnel %0, %2, 1b \n"
  142. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
  143. " daddiu %0, %0, -128 \n"
  144. " daddiu %1, %1, -128 \n"
  145. #endif
  146. #ifdef CONFIG_64BIT
  147. " ld $8, 0(%0) \n" /* Block copy a cacheline */
  148. "1: ld $9, 8(%0) \n"
  149. " ld $10, 16(%0) \n"
  150. " ld $11, 24(%0) \n"
  151. " sd $8, 0(%1) \n"
  152. " sd $9, 8(%1) \n"
  153. " sd $10, 16(%1) \n"
  154. " sd $11, 24(%1) \n"
  155. #else
  156. " lw $2, 0(%0) \n" /* Block copy a cacheline */
  157. "1: lw $3, 4(%0) \n"
  158. " lw $6, 8(%0) \n"
  159. " lw $7, 12(%0) \n"
  160. " lw $8, 16(%0) \n"
  161. " lw $9, 20(%0) \n"
  162. " lw $10, 24(%0) \n"
  163. " lw $11, 28(%0) \n"
  164. " sw $2, 0(%1) \n"
  165. " sw $3, 4(%1) \n"
  166. " sw $6, 8(%1) \n"
  167. " sw $7, 12(%1) \n"
  168. " sw $8, 16(%1) \n"
  169. " sw $9, 20(%1) \n"
  170. " sw $10, 24(%1) \n"
  171. " sw $11, 28(%1) \n"
  172. #endif
  173. " daddiu %0, %0, 32 \n"
  174. " daddiu %1, %1, 32 \n"
  175. " bnel %0, %2, 1b \n"
  176. #ifdef CONFIG_64BIT
  177. " ld $8, 0(%0) \n"
  178. #else
  179. " lw $2, 0(%0) \n"
  180. #endif
  181. " .set pop \n"
  182. : "+r" (src), "+r" (dst)
  183. : "r" (end)
  184. #ifdef CONFIG_64BIT
  185. : "$8","$9","$10","$11","memory");
  186. #else
  187. : "$2","$3","$6","$7","$8","$9","$10","$11","memory");
  188. #endif
  189. }
  190. #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  191. /*
  192. * Pad descriptors to cacheline, since each is exclusively owned by a
  193. * particular CPU.
  194. */
  195. typedef struct dmadscr_s {
  196. u64 dscr_a;
  197. u64 dscr_b;
  198. u64 pad_a;
  199. u64 pad_b;
  200. } dmadscr_t;
  201. static dmadscr_t page_descr[DM_NUM_CHANNELS]
  202. __attribute__((aligned(SMP_CACHE_BYTES)));
  203. void sb1_dma_init(void)
  204. {
  205. int i;
  206. for (i = 0; i < DM_NUM_CHANNELS; i++) {
  207. const u64 base_val = CPHYSADDR(&page_descr[i]) |
  208. V_DM_DSCR_BASE_RINGSZ(1);
  209. volatile void *base_reg =
  210. IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
  211. __raw_writeq(base_val, base_reg);
  212. __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
  213. __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
  214. }
  215. }
  216. void clear_page(void *page)
  217. {
  218. u64 to_phys = CPHYSADDR(page);
  219. unsigned int cpu = smp_processor_id();
  220. /* if the page is not in KSEG0, use old way */
  221. if ((long)KSEGX(page) != (long)CKSEG0)
  222. return clear_page_cpu(page);
  223. page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
  224. M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
  225. page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
  226. __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
  227. /*
  228. * Don't really want to do it this way, but there's no
  229. * reliable way to delay completion detection.
  230. */
  231. while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
  232. & M_DM_DSCR_BASE_INTERRUPT))
  233. ;
  234. __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  235. }
  236. void copy_page(void *to, void *from)
  237. {
  238. u64 from_phys = CPHYSADDR(from);
  239. u64 to_phys = CPHYSADDR(to);
  240. unsigned int cpu = smp_processor_id();
  241. /* if any page is not in KSEG0, use old way */
  242. if ((long)KSEGX(to) != (long)CKSEG0
  243. || (long)KSEGX(from) != (long)CKSEG0)
  244. return copy_page_cpu(to, from);
  245. page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
  246. M_DM_DSCRA_INTERRUPT;
  247. page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
  248. __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
  249. /*
  250. * Don't really want to do it this way, but there's no
  251. * reliable way to delay completion detection.
  252. */
  253. while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
  254. & M_DM_DSCR_BASE_INTERRUPT))
  255. ;
  256. __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  257. }
  258. #else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
  259. void clear_page(void *page)
  260. {
  261. return clear_page_cpu(page);
  262. }
  263. void copy_page(void *to, void *from)
  264. {
  265. return copy_page_cpu(to, from);
  266. }
  267. #endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
  268. EXPORT_SYMBOL(clear_page);
  269. EXPORT_SYMBOL(copy_page);