smp.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. /*
  2. * SMP Support
  3. *
  4. * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  5. * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
  6. *
  7. * Lots of stuff stolen from arch/alpha/kernel/smp.c
  8. *
  9. * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
  10. * the existing code (on the lines of x86 port).
  11. * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
  12. * calibration on each CPU.
  13. * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
  14. * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
  15. * & cpu_online_map now gets done here (instead of setup.c)
  16. * 99/10/05 davidm Update to bring it in sync with new command-line processing
  17. * scheme.
  18. * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
  19. * smp_call_function_single to resend IPI on timeouts
  20. */
  21. #include <linux/module.h>
  22. #include <linux/kernel.h>
  23. #include <linux/sched.h>
  24. #include <linux/init.h>
  25. #include <linux/interrupt.h>
  26. #include <linux/smp.h>
  27. #include <linux/kernel_stat.h>
  28. #include <linux/mm.h>
  29. #include <linux/cache.h>
  30. #include <linux/delay.h>
  31. #include <linux/efi.h>
  32. #include <linux/bitops.h>
  33. #include <linux/kexec.h>
  34. #include <asm/atomic.h>
  35. #include <asm/current.h>
  36. #include <asm/delay.h>
  37. #include <asm/machvec.h>
  38. #include <asm/io.h>
  39. #include <asm/irq.h>
  40. #include <asm/page.h>
  41. #include <asm/pgalloc.h>
  42. #include <asm/pgtable.h>
  43. #include <asm/processor.h>
  44. #include <asm/ptrace.h>
  45. #include <asm/sal.h>
  46. #include <asm/system.h>
  47. #include <asm/tlbflush.h>
  48. #include <asm/unistd.h>
  49. #include <asm/mca.h>
  50. /*
  51. * Note: alignment of 4 entries/cacheline was empirically determined
  52. * to be a good tradeoff between hot cachelines & spreading the array
  53. * across too many cacheline.
  54. */
  55. static struct local_tlb_flush_counts {
  56. unsigned int count;
  57. } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
  58. static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
  59. /*
  60. * Structure and data for smp_call_function(). This is designed to minimise static memory
  61. * requirements. It also looks cleaner.
  62. */
  63. static __cacheline_aligned DEFINE_SPINLOCK(call_lock);
  64. struct call_data_struct {
  65. void (*func) (void *info);
  66. void *info;
  67. long wait;
  68. atomic_t started;
  69. atomic_t finished;
  70. };
  71. static volatile struct call_data_struct *call_data;
  72. #define IPI_CALL_FUNC 0
  73. #define IPI_CPU_STOP 1
  74. #define IPI_KDUMP_CPU_STOP 3
  75. /* This needs to be cacheline aligned because it is written to by *other* CPUs. */
  76. static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
  77. extern void cpu_halt (void);
  78. void
  79. lock_ipi_calllock(void)
  80. {
  81. spin_lock_irq(&call_lock);
  82. }
  83. void
  84. unlock_ipi_calllock(void)
  85. {
  86. spin_unlock_irq(&call_lock);
  87. }
  88. static void
  89. stop_this_cpu (void)
  90. {
  91. /*
  92. * Remove this CPU:
  93. */
  94. cpu_clear(smp_processor_id(), cpu_online_map);
  95. max_xtp();
  96. local_irq_disable();
  97. cpu_halt();
  98. }
  99. void
  100. cpu_die(void)
  101. {
  102. max_xtp();
  103. local_irq_disable();
  104. cpu_halt();
  105. /* Should never be here */
  106. BUG();
  107. for (;;);
  108. }
  109. irqreturn_t
  110. handle_IPI (int irq, void *dev_id)
  111. {
  112. int this_cpu = get_cpu();
  113. unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
  114. unsigned long ops;
  115. mb(); /* Order interrupt and bit testing. */
  116. while ((ops = xchg(pending_ipis, 0)) != 0) {
  117. mb(); /* Order bit clearing and data access. */
  118. do {
  119. unsigned long which;
  120. which = ffz(~ops);
  121. ops &= ~(1 << which);
  122. switch (which) {
  123. case IPI_CALL_FUNC:
  124. {
  125. struct call_data_struct *data;
  126. void (*func)(void *info);
  127. void *info;
  128. int wait;
  129. /* release the 'pointer lock' */
  130. data = (struct call_data_struct *) call_data;
  131. func = data->func;
  132. info = data->info;
  133. wait = data->wait;
  134. mb();
  135. atomic_inc(&data->started);
  136. /*
  137. * At this point the structure may be gone unless
  138. * wait is true.
  139. */
  140. (*func)(info);
  141. /* Notify the sending CPU that the task is done. */
  142. mb();
  143. if (wait)
  144. atomic_inc(&data->finished);
  145. }
  146. break;
  147. case IPI_CPU_STOP:
  148. stop_this_cpu();
  149. break;
  150. #ifdef CONFIG_KEXEC
  151. case IPI_KDUMP_CPU_STOP:
  152. unw_init_running(kdump_cpu_freeze, NULL);
  153. break;
  154. #endif
  155. default:
  156. printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
  157. break;
  158. }
  159. } while (ops);
  160. mb(); /* Order data access and bit testing. */
  161. }
  162. put_cpu();
  163. return IRQ_HANDLED;
  164. }
  165. /*
  166. * Called with preeemption disabled.
  167. */
  168. static inline void
  169. send_IPI_single (int dest_cpu, int op)
  170. {
  171. set_bit(op, &per_cpu(ipi_operation, dest_cpu));
  172. platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
  173. }
  174. /*
  175. * Called with preeemption disabled.
  176. */
  177. static inline void
  178. send_IPI_allbutself (int op)
  179. {
  180. unsigned int i;
  181. for_each_online_cpu(i) {
  182. if (i != smp_processor_id())
  183. send_IPI_single(i, op);
  184. }
  185. }
  186. /*
  187. * Called with preeemption disabled.
  188. */
  189. static inline void
  190. send_IPI_all (int op)
  191. {
  192. int i;
  193. for_each_online_cpu(i) {
  194. send_IPI_single(i, op);
  195. }
  196. }
  197. /*
  198. * Called with preeemption disabled.
  199. */
  200. static inline void
  201. send_IPI_self (int op)
  202. {
  203. send_IPI_single(smp_processor_id(), op);
  204. }
  205. #ifdef CONFIG_KEXEC
  206. void
  207. kdump_smp_send_stop(void)
  208. {
  209. send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
  210. }
  211. void
  212. kdump_smp_send_init(void)
  213. {
  214. unsigned int cpu, self_cpu;
  215. self_cpu = smp_processor_id();
  216. for_each_online_cpu(cpu) {
  217. if (cpu != self_cpu) {
  218. if(kdump_status[cpu] == 0)
  219. platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
  220. }
  221. }
  222. }
  223. #endif
  224. /*
  225. * Called with preeemption disabled.
  226. */
  227. void
  228. smp_send_reschedule (int cpu)
  229. {
  230. platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
  231. }
  232. /*
  233. * Called with preeemption disabled.
  234. */
  235. static void
  236. smp_send_local_flush_tlb (int cpu)
  237. {
  238. platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
  239. }
  240. void
  241. smp_local_flush_tlb(void)
  242. {
  243. /*
  244. * Use atomic ops. Otherwise, the load/increment/store sequence from
  245. * a "++" operation can have the line stolen between the load & store.
  246. * The overhead of the atomic op in negligible in this case & offers
  247. * significant benefit for the brief periods where lots of cpus
  248. * are simultaneously flushing TLBs.
  249. */
  250. ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
  251. local_flush_tlb_all();
  252. }
  253. #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */
  254. void
  255. smp_flush_tlb_cpumask(cpumask_t xcpumask)
  256. {
  257. unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts);
  258. cpumask_t cpumask = xcpumask;
  259. int mycpu, cpu, flush_mycpu = 0;
  260. preempt_disable();
  261. mycpu = smp_processor_id();
  262. for_each_cpu_mask(cpu, cpumask)
  263. counts[cpu] = local_tlb_flush_counts[cpu].count;
  264. mb();
  265. for_each_cpu_mask(cpu, cpumask) {
  266. if (cpu == mycpu)
  267. flush_mycpu = 1;
  268. else
  269. smp_send_local_flush_tlb(cpu);
  270. }
  271. if (flush_mycpu)
  272. smp_local_flush_tlb();
  273. for_each_cpu_mask(cpu, cpumask)
  274. while(counts[cpu] == local_tlb_flush_counts[cpu].count)
  275. udelay(FLUSH_DELAY);
  276. preempt_enable();
  277. }
  278. void
  279. smp_flush_tlb_all (void)
  280. {
  281. on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
  282. }
  283. void
  284. smp_flush_tlb_mm (struct mm_struct *mm)
  285. {
  286. preempt_disable();
  287. /* this happens for the common case of a single-threaded fork(): */
  288. if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
  289. {
  290. local_finish_flush_tlb_mm(mm);
  291. preempt_enable();
  292. return;
  293. }
  294. preempt_enable();
  295. /*
  296. * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
  297. * have been running in the address space. It's not clear that this is worth the
  298. * trouble though: to avoid races, we have to raise the IPI on the target CPU
  299. * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
  300. * rather trivial.
  301. */
  302. on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
  303. }
  304. /*
  305. * Run a function on another CPU
  306. * <func> The function to run. This must be fast and non-blocking.
  307. * <info> An arbitrary pointer to pass to the function.
  308. * <nonatomic> Currently unused.
  309. * <wait> If true, wait until function has completed on other CPUs.
  310. * [RETURNS] 0 on success, else a negative status code.
  311. *
  312. * Does not return until the remote CPU is nearly ready to execute <func>
  313. * or is or has executed.
  314. */
  315. int
  316. smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
  317. int wait)
  318. {
  319. struct call_data_struct data;
  320. int cpus = 1;
  321. int me = get_cpu(); /* prevent preemption and reschedule on another processor */
  322. if (cpuid == me) {
  323. printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__);
  324. put_cpu();
  325. return -EBUSY;
  326. }
  327. data.func = func;
  328. data.info = info;
  329. atomic_set(&data.started, 0);
  330. data.wait = wait;
  331. if (wait)
  332. atomic_set(&data.finished, 0);
  333. spin_lock_bh(&call_lock);
  334. call_data = &data;
  335. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
  336. send_IPI_single(cpuid, IPI_CALL_FUNC);
  337. /* Wait for response */
  338. while (atomic_read(&data.started) != cpus)
  339. cpu_relax();
  340. if (wait)
  341. while (atomic_read(&data.finished) != cpus)
  342. cpu_relax();
  343. call_data = NULL;
  344. spin_unlock_bh(&call_lock);
  345. put_cpu();
  346. return 0;
  347. }
  348. EXPORT_SYMBOL(smp_call_function_single);
  349. /*
  350. * this function sends a 'generic call function' IPI to all other CPUs
  351. * in the system.
  352. */
  353. /*
  354. * [SUMMARY] Run a function on all other CPUs.
  355. * <func> The function to run. This must be fast and non-blocking.
  356. * <info> An arbitrary pointer to pass to the function.
  357. * <nonatomic> currently unused.
  358. * <wait> If true, wait (atomically) until function has completed on other CPUs.
  359. * [RETURNS] 0 on success, else a negative status code.
  360. *
  361. * Does not return until remote CPUs are nearly ready to execute <func> or are or have
  362. * executed.
  363. *
  364. * You must not call this function with disabled interrupts or from a
  365. * hardware interrupt handler or from a bottom half handler.
  366. */
  367. int
  368. smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
  369. {
  370. struct call_data_struct data;
  371. int cpus;
  372. spin_lock(&call_lock);
  373. cpus = num_online_cpus() - 1;
  374. if (!cpus) {
  375. spin_unlock(&call_lock);
  376. return 0;
  377. }
  378. /* Can deadlock when called with interrupts disabled */
  379. WARN_ON(irqs_disabled());
  380. data.func = func;
  381. data.info = info;
  382. atomic_set(&data.started, 0);
  383. data.wait = wait;
  384. if (wait)
  385. atomic_set(&data.finished, 0);
  386. call_data = &data;
  387. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
  388. send_IPI_allbutself(IPI_CALL_FUNC);
  389. /* Wait for response */
  390. while (atomic_read(&data.started) != cpus)
  391. cpu_relax();
  392. if (wait)
  393. while (atomic_read(&data.finished) != cpus)
  394. cpu_relax();
  395. call_data = NULL;
  396. spin_unlock(&call_lock);
  397. return 0;
  398. }
  399. EXPORT_SYMBOL(smp_call_function);
  400. /*
  401. * this function calls the 'stop' function on all other CPUs in the system.
  402. */
  403. void
  404. smp_send_stop (void)
  405. {
  406. send_IPI_allbutself(IPI_CPU_STOP);
  407. }
  408. int __init
  409. setup_profiling_timer (unsigned int multiplier)
  410. {
  411. return -EINVAL;
  412. }