pg-sb1.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*
  2. * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
  3. * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
  4. * Copyright (C) 2000 SiByte, Inc.
  5. * Copyright (C) 2005 Thiemo Seufer
  6. *
  7. * Written by Justin Carlson of SiByte, Inc.
  8. * and Kip Walker of Broadcom Corp.
  9. *
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License
  13. * as published by the Free Software Foundation; either version 2
  14. * of the License, or (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; if not, write to the Free Software
  23. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  24. */
  25. #include <linux/config.h>
  26. #include <linux/module.h>
  27. #include <linux/sched.h>
  28. #include <linux/smp.h>
  29. #include <asm/io.h>
  30. #include <asm/sibyte/sb1250.h>
  31. #include <asm/sibyte/sb1250_regs.h>
  32. #include <asm/sibyte/sb1250_dma.h>
  33. #ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
  34. #define SB1_PREF_LOAD_STREAMED_HINT "0"
  35. #define SB1_PREF_STORE_STREAMED_HINT "1"
  36. #else
  37. #define SB1_PREF_LOAD_STREAMED_HINT "4"
  38. #define SB1_PREF_STORE_STREAMED_HINT "5"
  39. #endif
  40. static inline void clear_page_cpu(void *page)
  41. {
  42. unsigned char *addr = (unsigned char *) page;
  43. unsigned char *end = addr + PAGE_SIZE;
  44. /*
  45. * JDCXXX - This should be bottlenecked by the write buffer, but these
  46. * things tend to be mildly unpredictable...should check this on the
  47. * performance model
  48. *
  49. * We prefetch 4 lines ahead. We're also "cheating" slightly here...
  50. * since we know we're on an SB1, we force the assembler to take
  51. * 64-bit operands to speed things up
  52. */
  53. __asm__ __volatile__(
  54. " .set push \n"
  55. " .set mips4 \n"
  56. " .set noreorder \n"
  57. #ifdef CONFIG_CPU_HAS_PREFETCH
  58. " daddiu %0, %0, 128 \n"
  59. " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n" /* Prefetch the first 4 lines */
  60. " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n"
  61. " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n"
  62. " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
  63. "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */
  64. " sd $0, -120(%0) \n"
  65. " sd $0, -112(%0) \n"
  66. " sd $0, -104(%0) \n"
  67. " daddiu %0, %0, 32 \n"
  68. " bnel %0, %1, 1b \n"
  69. " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
  70. " daddiu %0, %0, -128 \n"
  71. #endif
  72. " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */
  73. "1: sd $0, 8(%0) \n"
  74. " sd $0, 16(%0) \n"
  75. " sd $0, 24(%0) \n"
  76. " daddiu %0, %0, 32 \n"
  77. " bnel %0, %1, 1b \n"
  78. " sd $0, 0(%0) \n"
  79. " .set pop \n"
  80. : "+r" (addr)
  81. : "r" (end)
  82. : "memory");
  83. }
  84. static inline void copy_page_cpu(void *to, void *from)
  85. {
  86. unsigned char *src = (unsigned char *)from;
  87. unsigned char *dst = (unsigned char *)to;
  88. unsigned char *end = src + PAGE_SIZE;
  89. /*
  90. * The pref's used here are using "streaming" hints, which cause the
  91. * copied data to be kicked out of the cache sooner. A page copy often
  92. * ends up copying a lot more data than is commonly used, so this seems
  93. * to make sense in terms of reducing cache pollution, but I've no real
  94. * performance data to back this up
  95. */
  96. __asm__ __volatile__(
  97. " .set push \n"
  98. " .set mips4 \n"
  99. " .set noreorder \n"
  100. #ifdef CONFIG_CPU_HAS_PREFETCH
  101. " daddiu %0, %0, 128 \n"
  102. " daddiu %1, %1, 128 \n"
  103. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n" /* Prefetch the first 4 lines */
  104. " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
  105. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n"
  106. " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n"
  107. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n"
  108. " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n"
  109. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
  110. "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n"
  111. # ifdef CONFIG_64BIT
  112. " ld $8, -128(%0) \n" /* Block copy a cacheline */
  113. " ld $9, -120(%0) \n"
  114. " ld $10, -112(%0) \n"
  115. " ld $11, -104(%0) \n"
  116. " sd $8, -128(%1) \n"
  117. " sd $9, -120(%1) \n"
  118. " sd $10, -112(%1) \n"
  119. " sd $11, -104(%1) \n"
  120. # else
  121. " lw $2, -128(%0) \n" /* Block copy a cacheline */
  122. " lw $3, -124(%0) \n"
  123. " lw $6, -120(%0) \n"
  124. " lw $7, -116(%0) \n"
  125. " lw $8, -112(%0) \n"
  126. " lw $9, -108(%0) \n"
  127. " lw $10, -104(%0) \n"
  128. " lw $11, -100(%0) \n"
  129. " sw $2, -128(%1) \n"
  130. " sw $3, -124(%1) \n"
  131. " sw $6, -120(%1) \n"
  132. " sw $7, -116(%1) \n"
  133. " sw $8, -112(%1) \n"
  134. " sw $9, -108(%1) \n"
  135. " sw $10, -104(%1) \n"
  136. " sw $11, -100(%1) \n"
  137. # endif
  138. " daddiu %0, %0, 32 \n"
  139. " daddiu %1, %1, 32 \n"
  140. " bnel %0, %2, 1b \n"
  141. " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
  142. " daddiu %0, %0, -128 \n"
  143. " daddiu %1, %1, -128 \n"
  144. #endif
  145. #ifdef CONFIG_64BIT
  146. " ld $8, 0(%0) \n" /* Block copy a cacheline */
  147. "1: ld $9, 8(%0) \n"
  148. " ld $10, 16(%0) \n"
  149. " ld $11, 24(%0) \n"
  150. " sd $8, 0(%1) \n"
  151. " sd $9, 8(%1) \n"
  152. " sd $10, 16(%1) \n"
  153. " sd $11, 24(%1) \n"
  154. #else
  155. " lw $2, 0(%0) \n" /* Block copy a cacheline */
  156. "1: lw $3, 4(%0) \n"
  157. " lw $6, 8(%0) \n"
  158. " lw $7, 12(%0) \n"
  159. " lw $8, 16(%0) \n"
  160. " lw $9, 20(%0) \n"
  161. " lw $10, 24(%0) \n"
  162. " lw $11, 28(%0) \n"
  163. " sw $2, 0(%1) \n"
  164. " sw $3, 4(%1) \n"
  165. " sw $6, 8(%1) \n"
  166. " sw $7, 12(%1) \n"
  167. " sw $8, 16(%1) \n"
  168. " sw $9, 20(%1) \n"
  169. " sw $10, 24(%1) \n"
  170. " sw $11, 28(%1) \n"
  171. #endif
  172. " daddiu %0, %0, 32 \n"
  173. " daddiu %1, %1, 32 \n"
  174. " bnel %0, %2, 1b \n"
  175. #ifdef CONFIG_64BIT
  176. " ld $8, 0(%0) \n"
  177. #else
  178. " lw $2, 0(%0) \n"
  179. #endif
  180. " .set pop \n"
  181. : "+r" (src), "+r" (dst)
  182. : "r" (end)
  183. #ifdef CONFIG_64BIT
  184. : "$8","$9","$10","$11","memory");
  185. #else
  186. : "$2","$3","$6","$7","$8","$9","$10","$11","memory");
  187. #endif
  188. }
  189. #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  190. /*
  191. * Pad descriptors to cacheline, since each is exclusively owned by a
  192. * particular CPU.
  193. */
  194. typedef struct dmadscr_s {
  195. u64 dscr_a;
  196. u64 dscr_b;
  197. u64 pad_a;
  198. u64 pad_b;
  199. } dmadscr_t;
  200. static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES)));
  201. void sb1_dma_init(void)
  202. {
  203. int cpu = smp_processor_id();
  204. u64 base_val = CPHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1);
  205. bus_writeq(base_val,
  206. (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  207. bus_writeq(base_val | M_DM_DSCR_BASE_RESET,
  208. (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  209. bus_writeq(base_val | M_DM_DSCR_BASE_ENABL,
  210. (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  211. }
  212. void clear_page(void *page)
  213. {
  214. int cpu = smp_processor_id();
  215. /* if the page is above Kseg0, use old way */
  216. if ((long)KSEGX(page) != (long)CKSEG0)
  217. return clear_page_cpu(page);
  218. page_descr[cpu].dscr_a = CPHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
  219. page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
  220. bus_writeq(1, (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
  221. /*
  222. * Don't really want to do it this way, but there's no
  223. * reliable way to delay completion detection.
  224. */
  225. while (!(bus_readq((void *)(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) &
  226. M_DM_DSCR_BASE_INTERRUPT))))
  227. ;
  228. bus_readq((void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  229. }
  230. void copy_page(void *to, void *from)
  231. {
  232. unsigned long from_phys = CPHYSADDR(from);
  233. unsigned long to_phys = CPHYSADDR(to);
  234. int cpu = smp_processor_id();
  235. /* if either page is above Kseg0, use old way */
  236. if ((long)KSEGX(to) != (long)CKSEG0
  237. || (long)KSEGX(from) != (long)CKSEG0)
  238. return copy_page_cpu(to, from);
  239. page_descr[cpu].dscr_a = CPHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
  240. page_descr[cpu].dscr_b = CPHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
  241. bus_writeq(1, (void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
  242. /*
  243. * Don't really want to do it this way, but there's no
  244. * reliable way to delay completion detection.
  245. */
  246. while (!(bus_readq((void *)(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) &
  247. M_DM_DSCR_BASE_INTERRUPT))))
  248. ;
  249. bus_readq((void *)IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
  250. }
  251. #else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
  252. void clear_page(void *page)
  253. {
  254. return clear_page_cpu(page);
  255. }
  256. void copy_page(void *to, void *from)
  257. {
  258. return copy_page_cpu(to, from);
  259. }
  260. #endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
  261. EXPORT_SYMBOL(clear_page);
  262. EXPORT_SYMBOL(copy_page);