smp.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /*
  2. * SMP Support
  3. *
  4. * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
  5. * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
  6. *
  7. * Lots of stuff stolen from arch/alpha/kernel/smp.c
  8. *
  9. * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
  10. * the existing code (on the lines of x86 port).
  11. * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
  12. * calibration on each CPU.
  13. * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
  14. * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
  15. * & cpu_online_map now gets done here (instead of setup.c)
  16. * 99/10/05 davidm Update to bring it in sync with new command-line processing
  17. * scheme.
  18. * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
  19. * smp_call_function_single to resend IPI on timeouts
  20. */
  21. #include <linux/module.h>
  22. #include <linux/kernel.h>
  23. #include <linux/sched.h>
  24. #include <linux/init.h>
  25. #include <linux/interrupt.h>
  26. #include <linux/smp.h>
  27. #include <linux/kernel_stat.h>
  28. #include <linux/mm.h>
  29. #include <linux/cache.h>
  30. #include <linux/delay.h>
  31. #include <linux/efi.h>
  32. #include <linux/bitops.h>
  33. #include <linux/kexec.h>
  34. #include <asm/atomic.h>
  35. #include <asm/current.h>
  36. #include <asm/delay.h>
  37. #include <asm/machvec.h>
  38. #include <asm/io.h>
  39. #include <asm/irq.h>
  40. #include <asm/page.h>
  41. #include <asm/pgalloc.h>
  42. #include <asm/pgtable.h>
  43. #include <asm/processor.h>
  44. #include <asm/ptrace.h>
  45. #include <asm/sal.h>
  46. #include <asm/system.h>
  47. #include <asm/tlbflush.h>
  48. #include <asm/unistd.h>
  49. #include <asm/mca.h>
  50. /*
  51. * Note: alignment of 4 entries/cacheline was empirically determined
  52. * to be a good tradeoff between hot cachelines & spreading the array
  53. * across too many cacheline.
  54. */
  55. static struct local_tlb_flush_counts {
  56. unsigned int count;
  57. } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
  58. static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
  59. /*
  60. * Structure and data for smp_call_function(). This is designed to minimise static memory
  61. * requirements. It also looks cleaner.
  62. */
  63. static __cacheline_aligned DEFINE_SPINLOCK(call_lock);
  64. struct call_data_struct {
  65. void (*func) (void *info);
  66. void *info;
  67. long wait;
  68. atomic_t started;
  69. atomic_t finished;
  70. };
  71. static volatile struct call_data_struct *call_data;
  72. #define IPI_CALL_FUNC 0
  73. #define IPI_CPU_STOP 1
  74. #define IPI_KDUMP_CPU_STOP 3
  75. /* This needs to be cacheline aligned because it is written to by *other* CPUs. */
  76. static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation);
  77. extern void cpu_halt (void);
  78. void
  79. lock_ipi_calllock(void)
  80. {
  81. spin_lock_irq(&call_lock);
  82. }
  83. void
  84. unlock_ipi_calllock(void)
  85. {
  86. spin_unlock_irq(&call_lock);
  87. }
  88. static inline void
  89. handle_call_data(void)
  90. {
  91. struct call_data_struct *data;
  92. void (*func)(void *info);
  93. void *info;
  94. int wait;
  95. /* release the 'pointer lock' */
  96. data = (struct call_data_struct *)call_data;
  97. func = data->func;
  98. info = data->info;
  99. wait = data->wait;
  100. mb();
  101. atomic_inc(&data->started);
  102. /* At this point the structure may be gone unless wait is true. */
  103. (*func)(info);
  104. /* Notify the sending CPU that the task is done. */
  105. mb();
  106. if (wait)
  107. atomic_inc(&data->finished);
  108. }
  109. static void
  110. stop_this_cpu(void)
  111. {
  112. /*
  113. * Remove this CPU:
  114. */
  115. cpu_clear(smp_processor_id(), cpu_online_map);
  116. max_xtp();
  117. local_irq_disable();
  118. cpu_halt();
  119. }
  120. void
  121. cpu_die(void)
  122. {
  123. max_xtp();
  124. local_irq_disable();
  125. cpu_halt();
  126. /* Should never be here */
  127. BUG();
  128. for (;;);
  129. }
  130. irqreturn_t
  131. handle_IPI (int irq, void *dev_id)
  132. {
  133. int this_cpu = get_cpu();
  134. unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
  135. unsigned long ops;
  136. mb(); /* Order interrupt and bit testing. */
  137. while ((ops = xchg(pending_ipis, 0)) != 0) {
  138. mb(); /* Order bit clearing and data access. */
  139. do {
  140. unsigned long which;
  141. which = ffz(~ops);
  142. ops &= ~(1 << which);
  143. switch (which) {
  144. case IPI_CALL_FUNC:
  145. handle_call_data();
  146. break;
  147. case IPI_CPU_STOP:
  148. stop_this_cpu();
  149. break;
  150. #ifdef CONFIG_KEXEC
  151. case IPI_KDUMP_CPU_STOP:
  152. unw_init_running(kdump_cpu_freeze, NULL);
  153. break;
  154. #endif
  155. default:
  156. printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
  157. this_cpu, which);
  158. break;
  159. }
  160. } while (ops);
  161. mb(); /* Order data access and bit testing. */
  162. }
  163. put_cpu();
  164. return IRQ_HANDLED;
  165. }
  166. /*
  167. * Called with preemption disabled.
  168. */
  169. static inline void
  170. send_IPI_single (int dest_cpu, int op)
  171. {
  172. set_bit(op, &per_cpu(ipi_operation, dest_cpu));
  173. platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
  174. }
  175. /*
  176. * Called with preemption disabled.
  177. */
  178. static inline void
  179. send_IPI_allbutself (int op)
  180. {
  181. unsigned int i;
  182. for_each_online_cpu(i) {
  183. if (i != smp_processor_id())
  184. send_IPI_single(i, op);
  185. }
  186. }
  187. /*
  188. * Called with preemption disabled.
  189. */
  190. static inline void
  191. send_IPI_mask(cpumask_t mask, int op)
  192. {
  193. unsigned int cpu;
  194. for_each_cpu_mask(cpu, mask) {
  195. send_IPI_single(cpu, op);
  196. }
  197. }
  198. /*
  199. * Called with preemption disabled.
  200. */
  201. static inline void
  202. send_IPI_all (int op)
  203. {
  204. int i;
  205. for_each_online_cpu(i) {
  206. send_IPI_single(i, op);
  207. }
  208. }
  209. /*
  210. * Called with preemption disabled.
  211. */
  212. static inline void
  213. send_IPI_self (int op)
  214. {
  215. send_IPI_single(smp_processor_id(), op);
  216. }
  217. #ifdef CONFIG_KEXEC
  218. void
  219. kdump_smp_send_stop(void)
  220. {
  221. send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
  222. }
  223. void
  224. kdump_smp_send_init(void)
  225. {
  226. unsigned int cpu, self_cpu;
  227. self_cpu = smp_processor_id();
  228. for_each_online_cpu(cpu) {
  229. if (cpu != self_cpu) {
  230. if(kdump_status[cpu] == 0)
  231. platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0);
  232. }
  233. }
  234. }
  235. #endif
  236. /*
  237. * Called with preemption disabled.
  238. */
  239. void
  240. smp_send_reschedule (int cpu)
  241. {
  242. platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
  243. }
  244. /*
  245. * Called with preemption disabled.
  246. */
  247. static void
  248. smp_send_local_flush_tlb (int cpu)
  249. {
  250. platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0);
  251. }
  252. void
  253. smp_local_flush_tlb(void)
  254. {
  255. /*
  256. * Use atomic ops. Otherwise, the load/increment/store sequence from
  257. * a "++" operation can have the line stolen between the load & store.
  258. * The overhead of the atomic op in negligible in this case & offers
  259. * significant benefit for the brief periods where lots of cpus
  260. * are simultaneously flushing TLBs.
  261. */
  262. ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq);
  263. local_flush_tlb_all();
  264. }
  265. #define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */
  266. void
  267. smp_flush_tlb_cpumask(cpumask_t xcpumask)
  268. {
  269. unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts);
  270. cpumask_t cpumask = xcpumask;
  271. int mycpu, cpu, flush_mycpu = 0;
  272. preempt_disable();
  273. mycpu = smp_processor_id();
  274. for_each_cpu_mask(cpu, cpumask)
  275. counts[cpu] = local_tlb_flush_counts[cpu].count;
  276. mb();
  277. for_each_cpu_mask(cpu, cpumask) {
  278. if (cpu == mycpu)
  279. flush_mycpu = 1;
  280. else
  281. smp_send_local_flush_tlb(cpu);
  282. }
  283. if (flush_mycpu)
  284. smp_local_flush_tlb();
  285. for_each_cpu_mask(cpu, cpumask)
  286. while(counts[cpu] == local_tlb_flush_counts[cpu].count)
  287. udelay(FLUSH_DELAY);
  288. preempt_enable();
  289. }
  290. void
  291. smp_flush_tlb_all (void)
  292. {
  293. on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
  294. }
  295. void
  296. smp_flush_tlb_mm (struct mm_struct *mm)
  297. {
  298. preempt_disable();
  299. /* this happens for the common case of a single-threaded fork(): */
  300. if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
  301. {
  302. local_finish_flush_tlb_mm(mm);
  303. preempt_enable();
  304. return;
  305. }
  306. preempt_enable();
  307. /*
  308. * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
  309. * have been running in the address space. It's not clear that this is worth the
  310. * trouble though: to avoid races, we have to raise the IPI on the target CPU
  311. * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
  312. * rather trivial.
  313. */
  314. on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
  315. }
  316. /*
  317. * Run a function on a specific CPU
  318. * <func> The function to run. This must be fast and non-blocking.
  319. * <info> An arbitrary pointer to pass to the function.
  320. * <nonatomic> Currently unused.
  321. * <wait> If true, wait until function has completed on other CPUs.
  322. * [RETURNS] 0 on success, else a negative status code.
  323. *
  324. * Does not return until the remote CPU is nearly ready to execute <func>
  325. * or is or has executed.
  326. */
  327. int
  328. smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
  329. int wait)
  330. {
  331. struct call_data_struct data;
  332. int cpus = 1;
  333. int me = get_cpu(); /* prevent preemption and reschedule on another processor */
  334. if (cpuid == me) {
  335. local_irq_disable();
  336. func(info);
  337. local_irq_enable();
  338. put_cpu();
  339. return 0;
  340. }
  341. data.func = func;
  342. data.info = info;
  343. atomic_set(&data.started, 0);
  344. data.wait = wait;
  345. if (wait)
  346. atomic_set(&data.finished, 0);
  347. spin_lock_bh(&call_lock);
  348. call_data = &data;
  349. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
  350. send_IPI_single(cpuid, IPI_CALL_FUNC);
  351. /* Wait for response */
  352. while (atomic_read(&data.started) != cpus)
  353. cpu_relax();
  354. if (wait)
  355. while (atomic_read(&data.finished) != cpus)
  356. cpu_relax();
  357. call_data = NULL;
  358. spin_unlock_bh(&call_lock);
  359. put_cpu();
  360. return 0;
  361. }
  362. EXPORT_SYMBOL(smp_call_function_single);
  363. /**
  364. * smp_call_function_mask(): Run a function on a set of other CPUs.
  365. * <mask> The set of cpus to run on. Must not include the current cpu.
  366. * <func> The function to run. This must be fast and non-blocking.
  367. * <info> An arbitrary pointer to pass to the function.
  368. * <wait> If true, wait (atomically) until function
  369. * has completed on other CPUs.
  370. *
  371. * Returns 0 on success, else a negative status code.
  372. *
  373. * If @wait is true, then returns once @func has returned; otherwise
  374. * it returns just before the target cpu calls @func.
  375. *
  376. * You must not call this function with disabled interrupts or from a
  377. * hardware interrupt handler or from a bottom half handler.
  378. */
  379. int smp_call_function_mask(cpumask_t mask,
  380. void (*func)(void *), void *info,
  381. int wait)
  382. {
  383. struct call_data_struct data;
  384. cpumask_t allbutself;
  385. int cpus;
  386. spin_lock(&call_lock);
  387. allbutself = cpu_online_map;
  388. cpu_clear(smp_processor_id(), allbutself);
  389. cpus_and(mask, mask, allbutself);
  390. cpus = cpus_weight(mask);
  391. if (!cpus) {
  392. spin_unlock(&call_lock);
  393. return 0;
  394. }
  395. /* Can deadlock when called with interrupts disabled */
  396. WARN_ON(irqs_disabled());
  397. data.func = func;
  398. data.info = info;
  399. atomic_set(&data.started, 0);
  400. data.wait = wait;
  401. if (wait)
  402. atomic_set(&data.finished, 0);
  403. call_data = &data;
  404. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
  405. /* Send a message to other CPUs */
  406. if (cpus_equal(mask, allbutself))
  407. send_IPI_allbutself(IPI_CALL_FUNC);
  408. else
  409. send_IPI_mask(mask, IPI_CALL_FUNC);
  410. /* Wait for response */
  411. while (atomic_read(&data.started) != cpus)
  412. cpu_relax();
  413. if (wait)
  414. while (atomic_read(&data.finished) != cpus)
  415. cpu_relax();
  416. call_data = NULL;
  417. spin_unlock(&call_lock);
  418. return 0;
  419. }
  420. EXPORT_SYMBOL(smp_call_function_mask);
  421. /*
  422. * this function sends a 'generic call function' IPI to all other CPUs
  423. * in the system.
  424. */
  425. /*
  426. * [SUMMARY] Run a function on all other CPUs.
  427. * <func> The function to run. This must be fast and non-blocking.
  428. * <info> An arbitrary pointer to pass to the function.
  429. * <nonatomic> currently unused.
  430. * <wait> If true, wait (atomically) until function has completed on other CPUs.
  431. * [RETURNS] 0 on success, else a negative status code.
  432. *
  433. * Does not return until remote CPUs are nearly ready to execute <func> or are or have
  434. * executed.
  435. *
  436. * You must not call this function with disabled interrupts or from a
  437. * hardware interrupt handler or from a bottom half handler.
  438. */
  439. int
  440. smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
  441. {
  442. struct call_data_struct data;
  443. int cpus;
  444. spin_lock(&call_lock);
  445. cpus = num_online_cpus() - 1;
  446. if (!cpus) {
  447. spin_unlock(&call_lock);
  448. return 0;
  449. }
  450. /* Can deadlock when called with interrupts disabled */
  451. WARN_ON(irqs_disabled());
  452. data.func = func;
  453. data.info = info;
  454. atomic_set(&data.started, 0);
  455. data.wait = wait;
  456. if (wait)
  457. atomic_set(&data.finished, 0);
  458. call_data = &data;
  459. mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
  460. send_IPI_allbutself(IPI_CALL_FUNC);
  461. /* Wait for response */
  462. while (atomic_read(&data.started) != cpus)
  463. cpu_relax();
  464. if (wait)
  465. while (atomic_read(&data.finished) != cpus)
  466. cpu_relax();
  467. call_data = NULL;
  468. spin_unlock(&call_lock);
  469. return 0;
  470. }
  471. EXPORT_SYMBOL(smp_call_function);
  472. /*
  473. * this function calls the 'stop' function on all other CPUs in the system.
  474. */
  475. void
  476. smp_send_stop (void)
  477. {
  478. send_IPI_allbutself(IPI_CPU_STOP);
  479. }
  480. int
  481. setup_profiling_timer (unsigned int multiplier)
  482. {
  483. return -EINVAL;
  484. }