smp.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /*
  2. * SMP Support
  3. *
  4. * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  5. * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
  6. *
  7. * Lots of stuff stolen from arch/alpha/kernel/smp.c
  8. *
  9. * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
  10. * the existing code (on the lines of x86 port).
  11. * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
  12. * calibration on each CPU.
  13. * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
  14. * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
  15. * & cpu_online_map now gets done here (instead of setup.c)
  16. * 99/10/05 davidm Update to bring it in sync with new command-line processing
  17. * scheme.
  18. * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
  19. * smp_call_function_single to resend IPI on timeouts
  20. */
  21. #include <linux/module.h>
  22. #include <linux/kernel.h>
  23. #include <linux/sched.h>
  24. #include <linux/init.h>
  25. #include <linux/interrupt.h>
  26. #include <linux/smp.h>
  27. #include <linux/kernel_stat.h>
  28. #include <linux/mm.h>
  29. #include <linux/cache.h>
  30. #include <linux/delay.h>
  31. #include <linux/efi.h>
  32. #include <linux/bitops.h>
  33. #include <linux/kexec.h>
  34. #include <asm/atomic.h>
  35. #include <asm/current.h>
  36. #include <asm/delay.h>
  37. #include <asm/machvec.h>
  38. #include <asm/io.h>
  39. #include <asm/irq.h>
  40. #include <asm/page.h>
  41. #include <asm/pgalloc.h>
  42. #include <asm/pgtable.h>
  43. #include <asm/processor.h>
  44. #include <asm/ptrace.h>
  45. #include <asm/sal.h>
  46. #include <asm/system.h>
  47. #include <asm/tlbflush.h>
  48. #include <asm/unistd.h>
  49. #include <asm/mca.h>
  50. /*
  51. * Note: alignment of 4 entries/cacheline was empirically determined
  52. * to be a good tradeoff between hot cachelines & spreading the array
  53. * across too many cacheline.
  54. */
  55. static struct local_tlb_flush_counts {
  56. unsigned int count;
  57. } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
  58. static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
  59. #define IPI_CALL_FUNC 0
  60. #define IPI_CPU_STOP 1
  61. #define IPI_CALL_FUNC_SINGLE 2
  62. #define IPI_KDUMP_CPU_STOP 3
  63. /* This needs to be cacheline aligned because it is written to by *other* CPUs. */
  64. static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation);
  65. extern void cpu_halt (void);
  66. static void
  67. stop_this_cpu(void)
  68. {
  69. /*
  70. * Remove this CPU:
  71. */
  72. cpu_clear(smp_processor_id(), cpu_online_map);
  73. max_xtp();
  74. local_irq_disable();
  75. cpu_halt();
  76. }
  77. void
  78. cpu_die(void)
  79. {
  80. max_xtp();
  81. local_irq_disable();
  82. cpu_halt();
  83. /* Should never be here */
  84. BUG();
  85. for (;;);
  86. }
  87. irqreturn_t
  88. handle_IPI (int irq, void *dev_id)
  89. {
  90. int this_cpu = get_cpu();
  91. unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
  92. unsigned long ops;
  93. mb(); /* Order interrupt and bit testing. */
  94. while ((ops = xchg(pending_ipis, 0)) != 0) {
  95. mb(); /* Order bit clearing and data access. */
  96. do {
  97. unsigned long which;
  98. which = ffz(~ops);
  99. ops &= ~(1 << which);
  100. switch (which) {
  101. case IPI_CPU_STOP:
  102. stop_this_cpu();
  103. break;
  104. case IPI_CALL_FUNC:
  105. generic_smp_call_function_interrupt();
  106. break;
  107. case IPI_CALL_FUNC_SINGLE:
  108. generic_smp_call_function_single_interrupt();
  109. break;
  110. #ifdef CONFIG_KEXEC
  111. case IPI_KDUMP_CPU_STOP:
  112. unw_init_running(kdump_cpu_freeze, NULL);
  113. break;
  114. #endif
  115. default:
  116. printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
  117. this_cpu, which);
  118. break;
  119. }
  120. } while (ops);
  121. mb(); /* Order data access and bit testing. */
  122. }
  123. put_cpu();
  124. return IRQ_HANDLED;
  125. }
  126. /*
  127. * Called with preemption disabled.
  128. */
  129. static inline void
  130. send_IPI_single (int dest_cpu, int op)
  131. {
  132. set_bit(op, &per_cpu(ipi_operation, dest_cpu));
  133. platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
  134. }
  135. /*
  136. * Called with preemption disabled.
  137. */
  138. static inline void
  139. send_IPI_allbutself (int op)
  140. {
  141. unsigned int i;
  142. for_each_online_cpu(i) {
  143. if (i != smp_processor_id())
  144. send_IPI_single(i, op);
  145. }
  146. }
  147. /*
  148. * Called with preemption disabled.
  149. */
  150. static inline void
  151. send_IPI_mask(const struct cpumask *mask, int op)
  152. {
  153. unsigned int cpu;
  154. for_each_cpu(cpu, mask) {
  155. send_IPI_single(cpu, op);
  156. }
  157. }
  158. /*
  159. * Called with preemption disabled.
  160. */
  161. static inline void
  162. send_IPI_all (int op)
  163. {
  164. int i;
  165. for_each_online_cpu(i) {
  166. send_IPI_single(i, op);
  167. }
  168. }
  169. /*
  170. * Called with preemption disabled.
  171. */
  172. static inline void
  173. send_IPI_self (int op)
  174. {
  175. send_IPI_single(smp_processor_id(), op);
  176. }
  177. #ifdef CONFIG_KEXEC
  178. void
  179. kdump_smp_send_stop(void)
  180. {
  181. send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
  182. }
  183. void
  184. kdump_smp_send_init(void)
  185. {
  186. unsigned int cpu, self_cpu;
  187. self_cpu = smp_processor_id();
  188. for_each_online_cpu(cpu) {
  189. if (cpu != self_cpu) {
  190. if(kdump_status[cpu] == 0)
  191. platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
  192. }
  193. }
  194. }
  195. #endif
  196. /*
  197. * Called with preemption disabled.
  198. */
  199. void
  200. smp_send_reschedule (int cpu)
  201. {
  202. platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
  203. }
  204. /*
  205. * Called with preemption disabled.
  206. */
  207. static void
  208. smp_send_local_flush_tlb (int cpu)
  209. {
  210. platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
  211. }
  212. void
  213. smp_local_flush_tlb(void)
  214. {
  215. /*
  216. * Use atomic ops. Otherwise, the load/increment/store sequence from
  217. * a "++" operation can have the line stolen between the load & store.
  218. * The overhead of the atomic op in negligible in this case & offers
  219. * significant benefit for the brief periods where lots of cpus
  220. * are simultaneously flushing TLBs.
  221. */
  222. ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
  223. local_flush_tlb_all();
  224. }
  225. #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */
  226. void
  227. smp_flush_tlb_cpumask(cpumask_t xcpumask)
  228. {
  229. unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts);
  230. cpumask_t cpumask = xcpumask;
  231. int mycpu, cpu, flush_mycpu = 0;
  232. preempt_disable();
  233. mycpu = smp_processor_id();
  234. for_each_cpu_mask(cpu, cpumask)
  235. counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
  236. mb();
  237. for_each_cpu_mask(cpu, cpumask) {
  238. if (cpu == mycpu)
  239. flush_mycpu = 1;
  240. else
  241. smp_send_local_flush_tlb(cpu);
  242. }
  243. if (flush_mycpu)
  244. smp_local_flush_tlb();
  245. for_each_cpu_mask(cpu, cpumask)
  246. while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
  247. udelay(FLUSH_DELAY);
  248. preempt_enable();
  249. }
  250. void
  251. smp_flush_tlb_all (void)
  252. {
  253. on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
  254. }
  255. void
  256. smp_flush_tlb_mm (struct mm_struct *mm)
  257. {
  258. preempt_disable();
  259. /* this happens for the common case of a single-threaded fork(): */
  260. if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
  261. {
  262. local_finish_flush_tlb_mm(mm);
  263. preempt_enable();
  264. return;
  265. }
  266. preempt_enable();
  267. /*
  268. * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
  269. * have been running in the address space. It's not clear that this is worth the
  270. * trouble though: to avoid races, we have to raise the IPI on the target CPU
  271. * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
  272. * rather trivial.
  273. */
  274. on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
  275. }
  276. void arch_send_call_function_single_ipi(int cpu)
  277. {
  278. send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
  279. }
  280. void arch_send_call_function_ipi_mask(const struct cpumask *mask)
  281. {
  282. send_IPI_mask(mask, IPI_CALL_FUNC);
  283. }
  284. /*
  285. * this function calls the 'stop' function on all other CPUs in the system.
  286. */
  287. void
  288. smp_send_stop (void)
  289. {
  290. send_IPI_allbutself(IPI_CPU_STOP);
  291. }
  292. int
  293. setup_profiling_timer (unsigned int multiplier)
  294. {
  295. return -EINVAL;
  296. }