smp.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. /*
  2. * SMP Support
  3. *
  4. * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  5. * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
  6. *
  7. * Lots of stuff stolen from arch/alpha/kernel/smp.c
  8. *
  9. * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
  10. * the existing code (on the lines of x86 port).
  11. * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
  12. * calibration on each CPU.
  13. * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
  14. * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
  15. * & cpu_online_map now gets done here (instead of setup.c)
  16. * 99/10/05 davidm Update to bring it in sync with new command-line processing
  17. * scheme.
  18. * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
  19. * smp_call_function_single to resend IPI on timeouts
  20. */
  21. #include <linux/module.h>
  22. #include <linux/kernel.h>
  23. #include <linux/sched.h>
  24. #include <linux/init.h>
  25. #include <linux/interrupt.h>
  26. #include <linux/smp.h>
  27. #include <linux/kernel_stat.h>
  28. #include <linux/mm.h>
  29. #include <linux/cache.h>
  30. #include <linux/delay.h>
  31. #include <linux/efi.h>
  32. #include <linux/bitops.h>
  33. #include <linux/kexec.h>
  34. #include <asm/atomic.h>
  35. #include <asm/current.h>
  36. #include <asm/delay.h>
  37. #include <asm/machvec.h>
  38. #include <asm/io.h>
  39. #include <asm/irq.h>
  40. #include <asm/page.h>
  41. #include <asm/pgalloc.h>
  42. #include <asm/pgtable.h>
  43. #include <asm/processor.h>
  44. #include <asm/ptrace.h>
  45. #include <asm/sal.h>
  46. #include <asm/system.h>
  47. #include <asm/tlbflush.h>
  48. #include <asm/unistd.h>
  49. #include <asm/mca.h>
  50. /*
  51. * Note: alignment of 4 entries/cacheline was empirically determined
  52. * to be a good tradeoff between hot cachelines & spreading the array
  53. * across too many cacheline.
  54. */
  55. static struct local_tlb_flush_counts {
  56. unsigned int count;
  57. } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
  58. static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
  59. /*
  60. * Structure and data for smp_call_function(). This is designed to minimise static memory
  61. * requirements. It also looks cleaner.
  62. */
  63. static __cacheline_aligned DEFINE_SPINLOCK(call_lock);
  64. struct call_data_struct {
  65. void (*func) (void *info);
  66. void *info;
  67. long wait;
  68. atomic_t started;
  69. atomic_t finished;
  70. };
  71. static volatile struct call_data_struct *call_data;
  72. #define IPI_CALL_FUNC 0
  73. #define IPI_CPU_STOP 1
  74. #define IPI_KDUMP_CPU_STOP 3
  75. /* This needs to be cacheline aligned because it is written to by *other* CPUs. */
  76. static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation);
  77. extern void cpu_halt (void);
  78. void
  79. lock_ipi_calllock(void)
  80. {
  81. spin_lock_irq(&call_lock);
  82. }
  83. void
  84. unlock_ipi_calllock(void)
  85. {
  86. spin_unlock_irq(&call_lock);
  87. }
  88. static void
  89. stop_this_cpu (void)
  90. {
  91. /*
  92. * Remove this CPU:
  93. */
  94. cpu_clear(smp_processor_id(), cpu_online_map);
  95. max_xtp();
  96. local_irq_disable();
  97. cpu_halt();
  98. }
  99. void
  100. cpu_die(void)
  101. {
  102. max_xtp();
  103. local_irq_disable();
  104. cpu_halt();
  105. /* Should never be here */
  106. BUG();
  107. for (;;);
  108. }
  109. irqreturn_t
  110. handle_IPI (int irq, void *dev_id)
  111. {
  112. int this_cpu = get_cpu();
  113. unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
  114. unsigned long ops;
  115. mb(); /* Order interrupt and bit testing. */
  116. while ((ops = xchg(pending_ipis, 0)) != 0) {
  117. mb(); /* Order bit clearing and data access. */
  118. do {
  119. unsigned long which;
  120. which = ffz(~ops);
  121. ops &= ~(1 << which);
  122. switch (which) {
  123. case IPI_CALL_FUNC:
  124. {
  125. struct call_data_struct *data;
  126. void (*func)(void *info);
  127. void *info;
  128. int wait;
  129. /* release the 'pointer lock' */
  130. data = (struct call_data_struct *) call_data;
  131. func = data->func;
  132. info = data->info;
  133. wait = data->wait;
  134. mb();
  135. atomic_inc(&data->started);
  136. /*
  137. * At this point the structure may be gone unless
  138. * wait is true.
  139. */
  140. (*func)(info);
  141. /* Notify the sending CPU that the task is done. */
  142. mb();
  143. if (wait)
  144. atomic_inc(&data->finished);
  145. }
  146. break;
  147. case IPI_CPU_STOP:
  148. stop_this_cpu();
  149. break;
  150. #ifdef CONFIG_KEXEC
  151. case IPI_KDUMP_CPU_STOP:
  152. unw_init_running(kdump_cpu_freeze, NULL);
  153. break;
  154. #endif
  155. default:
  156. printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
  157. break;
  158. }
  159. } while (ops);
  160. mb(); /* Order data access and bit testing. */
  161. }
  162. put_cpu();
  163. return IRQ_HANDLED;
  164. }
  165. /*
  166. * Called with preemption disabled.
  167. */
  168. static inline void
  169. send_IPI_single (int dest_cpu, int op)
  170. {
  171. set_bit(op, &per_cpu(ipi_operation, dest_cpu));
  172. platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
  173. }
  174. /*
  175. * Called with preemption disabled.
  176. */
  177. static inline void
  178. send_IPI_allbutself (int op)
  179. {
  180. unsigned int i;
  181. for_each_online_cpu(i) {
  182. if (i != smp_processor_id())
  183. send_IPI_single(i, op);
  184. }
  185. }
  186. /*
  187. * Called with preemption disabled.
  188. */
  189. static inline void
  190. send_IPI_mask(cpumask_t mask, int op)
  191. {
  192. unsigned int cpu;
  193. for_each_cpu_mask(cpu, mask) {
  194. send_IPI_single(cpu, op);
  195. }
  196. }
  197. /*
  198. * Called with preemption disabled.
  199. */
  200. static inline void
  201. send_IPI_all (int op)
  202. {
  203. int i;
  204. for_each_online_cpu(i) {
  205. send_IPI_single(i, op);
  206. }
  207. }
  208. /*
  209. * Called with preemption disabled.
  210. */
  211. static inline void
  212. send_IPI_self (int op)
  213. {
  214. send_IPI_single(smp_processor_id(), op);
  215. }
  216. #ifdef CONFIG_KEXEC
  217. void
  218. kdump_smp_send_stop(void)
  219. {
  220. send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
  221. }
  222. void
  223. kdump_smp_send_init(void)
  224. {
  225. unsigned int cpu, self_cpu;
  226. self_cpu = smp_processor_id();
  227. for_each_online_cpu(cpu) {
  228. if (cpu != self_cpu) {
  229. if(kdump_status[cpu] == 0)
  230. platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
  231. }
  232. }
  233. }
  234. #endif
  235. /*
  236. * Called with preemption disabled.
  237. */
  238. void
  239. smp_send_reschedule (int cpu)
  240. {
  241. platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
  242. }
  243. /*
  244. * Called with preemption disabled.
  245. */
  246. static void
  247. smp_send_local_flush_tlb (int cpu)
  248. {
  249. platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
  250. }
  251. void
  252. smp_local_flush_tlb(void)
  253. {
  254. /*
  255. * Use atomic ops. Otherwise, the load/increment/store sequence from
  256. * a "++" operation can have the line stolen between the load & store.
  257. * The overhead of the atomic op in negligible in this case & offers
  258. * significant benefit for the brief periods where lots of cpus
  259. * are simultaneously flushing TLBs.
  260. */
  261. ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
  262. local_flush_tlb_all();
  263. }
  264. #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */
  265. void
  266. smp_flush_tlb_cpumask(cpumask_t xcpumask)
  267. {
  268. unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts);
  269. cpumask_t cpumask = xcpumask;
  270. int mycpu, cpu, flush_mycpu = 0;
  271. preempt_disable();
  272. mycpu = smp_processor_id();
  273. for_each_cpu_mask(cpu, cpumask)
  274. counts[cpu] = local_tlb_flush_counts[cpu].count;
  275. mb();
  276. for_each_cpu_mask(cpu, cpumask) {
  277. if (cpu == mycpu)
  278. flush_mycpu = 1;
  279. else
  280. smp_send_local_flush_tlb(cpu);
  281. }
  282. if (flush_mycpu)
  283. smp_local_flush_tlb();
  284. for_each_cpu_mask(cpu, cpumask)
  285. while(counts[cpu] == local_tlb_flush_counts[cpu].count)
  286. udelay(FLUSH_DELAY);
  287. preempt_enable();
  288. }
  289. void
  290. smp_flush_tlb_all (void)
  291. {
  292. on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
  293. }
  294. void
  295. smp_flush_tlb_mm (struct mm_struct *mm)
  296. {
  297. preempt_disable();
  298. /* this happens for the common case of a single-threaded fork(): */
  299. if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
  300. {
  301. local_finish_flush_tlb_mm(mm);
  302. preempt_enable();
  303. return;
  304. }
  305. preempt_enable();
  306. /*
  307. * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
  308. * have been running in the address space. It's not clear that this is worth the
  309. * trouble though: to avoid races, we have to raise the IPI on the target CPU
  310. * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
  311. * rather trivial.
  312. */
  313. on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
  314. }
  315. /*
  316. * Run a function on a specific CPU
  317. * <func> The function to run. This must be fast and non-blocking.
  318. * <info> An arbitrary pointer to pass to the function.
  319. * <nonatomic> Currently unused.
  320. * <wait> If true, wait until function has completed on other CPUs.
  321. * [RETURNS] 0 on success, else a negative status code.
  322. *
  323. * Does not return until the remote CPU is nearly ready to execute <func>
  324. * or is or has executed.
  325. */
  326. int
  327. smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
  328. int wait)
  329. {
  330. struct call_data_struct data;
  331. int cpus = 1;
  332. int me = get_cpu(); /* prevent preemption and reschedule on another processor */
  333. if (cpuid == me) {
  334. local_irq_disable();
  335. func(info);
  336. local_irq_enable();
  337. put_cpu();
  338. return 0;
  339. }
  340. data.func = func;
  341. data.info = info;
  342. atomic_set(&data.started, 0);
  343. data.wait = wait;
  344. if (wait)
  345. atomic_set(&data.finished, 0);
  346. spin_lock_bh(&call_lock);
  347. call_data = &data;
  348. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
  349. send_IPI_single(cpuid, IPI_CALL_FUNC);
  350. /* Wait for response */
  351. while (atomic_read(&data.started) != cpus)
  352. cpu_relax();
  353. if (wait)
  354. while (atomic_read(&data.finished) != cpus)
  355. cpu_relax();
  356. call_data = NULL;
  357. spin_unlock_bh(&call_lock);
  358. put_cpu();
  359. return 0;
  360. }
  361. EXPORT_SYMBOL(smp_call_function_single);
  362. /**
  363. * smp_call_function_mask(): Run a function on a set of other CPUs.
  364. * <mask> The set of cpus to run on. Must not include the current cpu.
  365. * <func> The function to run. This must be fast and non-blocking.
  366. * <info> An arbitrary pointer to pass to the function.
  367. * <wait> If true, wait (atomically) until function
  368. * has completed on other CPUs.
  369. *
  370. * Returns 0 on success, else a negative status code.
  371. *
  372. * If @wait is true, then returns once @func has returned; otherwise
  373. * it returns just before the target cpu calls @func.
  374. *
  375. * You must not call this function with disabled interrupts or from a
  376. * hardware interrupt handler or from a bottom half handler.
  377. */
  378. int smp_call_function_mask(cpumask_t mask,
  379. void (*func)(void *), void *info,
  380. int wait)
  381. {
  382. struct call_data_struct data;
  383. cpumask_t allbutself;
  384. int cpus;
  385. spin_lock(&call_lock);
  386. allbutself = cpu_online_map;
  387. cpu_clear(smp_processor_id(), allbutself);
  388. cpus_and(mask, mask, allbutself);
  389. cpus = cpus_weight(mask);
  390. if (!cpus) {
  391. spin_unlock(&call_lock);
  392. return 0;
  393. }
  394. /* Can deadlock when called with interrupts disabled */
  395. WARN_ON(irqs_disabled());
  396. data.func = func;
  397. data.info = info;
  398. atomic_set(&data.started, 0);
  399. data.wait = wait;
  400. if (wait)
  401. atomic_set(&data.finished, 0);
  402. call_data = &data;
  403. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
  404. /* Send a message to other CPUs */
  405. if (cpus_equal(mask, allbutself))
  406. send_IPI_allbutself(IPI_CALL_FUNC);
  407. else
  408. send_IPI_mask(mask, IPI_CALL_FUNC);
  409. /* Wait for response */
  410. while (atomic_read(&data.started) != cpus)
  411. cpu_relax();
  412. if (wait)
  413. while (atomic_read(&data.finished) != cpus)
  414. cpu_relax();
  415. call_data = NULL;
  416. spin_unlock(&call_lock);
  417. return 0;
  418. }
  419. EXPORT_SYMBOL(smp_call_function_mask);
  420. /*
  421. * this function sends a 'generic call function' IPI to all other CPUs
  422. * in the system.
  423. */
  424. /*
  425. * [SUMMARY] Run a function on all other CPUs.
  426. * <func> The function to run. This must be fast and non-blocking.
  427. * <info> An arbitrary pointer to pass to the function.
  428. * <nonatomic> currently unused.
  429. * <wait> If true, wait (atomically) until function has completed on other CPUs.
  430. * [RETURNS] 0 on success, else a negative status code.
  431. *
  432. * Does not return until remote CPUs are nearly ready to execute <func> or are or have
  433. * executed.
  434. *
  435. * You must not call this function with disabled interrupts or from a
  436. * hardware interrupt handler or from a bottom half handler.
  437. */
  438. int
  439. smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
  440. {
  441. struct call_data_struct data;
  442. int cpus;
  443. spin_lock(&call_lock);
  444. cpus = num_online_cpus() - 1;
  445. if (!cpus) {
  446. spin_unlock(&call_lock);
  447. return 0;
  448. }
  449. /* Can deadlock when called with interrupts disabled */
  450. WARN_ON(irqs_disabled());
  451. data.func = func;
  452. data.info = info;
  453. atomic_set(&data.started, 0);
  454. data.wait = wait;
  455. if (wait)
  456. atomic_set(&data.finished, 0);
  457. call_data = &data;
  458. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
  459. send_IPI_allbutself(IPI_CALL_FUNC);
  460. /* Wait for response */
  461. while (atomic_read(&data.started) != cpus)
  462. cpu_relax();
  463. if (wait)
  464. while (atomic_read(&data.finished) != cpus)
  465. cpu_relax();
  466. call_data = NULL;
  467. spin_unlock(&call_lock);
  468. return 0;
  469. }
  470. EXPORT_SYMBOL(smp_call_function);
  471. /*
  472. * this function calls the 'stop' function on all other CPUs in the system.
  473. */
  474. void
  475. smp_send_stop (void)
  476. {
  477. send_IPI_allbutself(IPI_CPU_STOP);
  478. }
  479. int
  480. setup_profiling_timer (unsigned int multiplier)
  481. {
  482. return -EINVAL;
  483. }