smpboot_64.c 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024
  1. /*
  2. * x86 SMP booting functions
  3. *
  4. * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
  5. * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
  6. * Copyright 2001 Andi Kleen, SuSE Labs.
  7. *
  8. * Much of the core SMP work is based on previous work by Thomas Radke, to
  9. * whom a great many thanks are extended.
  10. *
  11. * Thanks to Intel for making available several different Pentium,
  12. * Pentium Pro and Pentium-II/Xeon MP machines.
  13. * Original development of Linux SMP code supported by Caldera.
  14. *
  15. * This code is released under the GNU General Public License version 2
  16. *
  17. * Fixes
  18. * Felix Koop : NR_CPUS used properly
  19. * Jose Renau : Handle single CPU case.
  20. * Alan Cox : By repeated request 8) - Total BogoMIP report.
  21. * Greg Wright : Fix for kernel stacks panic.
  22. * Erich Boleyn : MP v1.4 and additional changes.
  23. * Matthias Sattler : Changes for 2.1 kernel map.
  24. * Michel Lespinasse : Changes for 2.1 kernel map.
  25. * Michael Chastain : Change trampoline.S to gnu as.
  26. * Alan Cox : Dumb bug: 'B' step PPro's are fine
  27. * Ingo Molnar : Added APIC timers, based on code
  28. * from Jose Renau
  29. * Ingo Molnar : various cleanups and rewrites
  30. * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
  31. * Maciej W. Rozycki : Bits for genuine 82489DX APICs
  32. * Andi Kleen : Changed for SMP boot into long mode.
  33. * Rusty Russell : Hacked into shape for new "hotplug" boot process.
  34. * Andi Kleen : Converted to new state machine.
  35. * Various cleanups.
  36. * Probably mostly hotplug CPU ready now.
  37. * Ashok Raj : CPU hotplug support
  38. */
  39. #include <linux/init.h>
  40. #include <linux/mm.h>
  41. #include <linux/kernel_stat.h>
  42. #include <linux/bootmem.h>
  43. #include <linux/thread_info.h>
  44. #include <linux/module.h>
  45. #include <linux/delay.h>
  46. #include <linux/mc146818rtc.h>
  47. #include <linux/smp.h>
  48. #include <linux/kdebug.h>
  49. #include <asm/mtrr.h>
  50. #include <asm/pgalloc.h>
  51. #include <asm/desc.h>
  52. #include <asm/tlbflush.h>
  53. #include <asm/proto.h>
  54. #include <asm/nmi.h>
  55. #include <asm/irq.h>
  56. #include <asm/hw_irq.h>
  57. #include <asm/numa.h>
  58. /* Set when the idlers are all forked */
  59. int smp_threads_ready;
  60. /*
  61. * Trampoline 80x86 program as an array.
  62. */
  63. extern const unsigned char trampoline_data[];
  64. extern const unsigned char trampoline_end[];
  65. /* State of each CPU */
  66. DEFINE_PER_CPU(int, cpu_state) = { 0 };
  67. /*
  68. * Store all idle threads, this can be reused instead of creating
  69. * a new thread. Also avoids complicated thread destroy functionality
  70. * for idle threads.
  71. */
  72. #ifdef CONFIG_HOTPLUG_CPU
  73. /*
  74. * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
  75. * removed after init for !CONFIG_HOTPLUG_CPU.
  76. */
  77. static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
  78. #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
  79. #define set_idle_for_cpu(x,p) (per_cpu(idle_thread_array, x) = (p))
  80. #else
  81. struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
  82. #define get_idle_for_cpu(x) (idle_thread_array[(x)])
  83. #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
  84. #endif
  85. /*
  86. * Currently trivial. Write the real->protected mode
  87. * bootstrap into the page concerned. The caller
  88. * has made sure it's suitably aligned.
  89. */
  90. static unsigned long __cpuinit setup_trampoline(void)
  91. {
  92. void *tramp = __va(SMP_TRAMPOLINE_BASE);
  93. memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
  94. return virt_to_phys(tramp);
  95. }
  96. /*
  97. * The bootstrap kernel entry code has set these up. Save them for
  98. * a given CPU
  99. */
  100. static void __cpuinit smp_store_cpu_info(int id)
  101. {
  102. struct cpuinfo_x86 *c = &cpu_data(id);
  103. *c = boot_cpu_data;
  104. c->cpu_index = id;
  105. identify_cpu(c);
  106. print_cpu_info(c);
  107. }
  108. static atomic_t init_deasserted __cpuinitdata;
  109. /*
  110. * Report back to the Boot Processor.
  111. * Running on AP.
  112. */
  113. void __cpuinit smp_callin(void)
  114. {
  115. int cpuid, phys_id;
  116. unsigned long timeout;
  117. /*
  118. * If waken up by an INIT in an 82489DX configuration
  119. * we may get here before an INIT-deassert IPI reaches
  120. * our local APIC. We have to wait for the IPI or we'll
  121. * lock up on an APIC access.
  122. */
  123. while (!atomic_read(&init_deasserted))
  124. cpu_relax();
  125. /*
  126. * (This works even if the APIC is not enabled.)
  127. */
  128. phys_id = GET_APIC_ID(apic_read(APIC_ID));
  129. cpuid = smp_processor_id();
  130. if (cpu_isset(cpuid, cpu_callin_map)) {
  131. panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
  132. phys_id, cpuid);
  133. }
  134. Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
  135. /*
  136. * STARTUP IPIs are fragile beasts as they might sometimes
  137. * trigger some glue motherboard logic. Complete APIC bus
  138. * silence for 1 second, this overestimates the time the
  139. * boot CPU is spending to send the up to 2 STARTUP IPIs
  140. * by a factor of two. This should be enough.
  141. */
  142. /*
  143. * Waiting 2s total for startup (udelay is not yet working)
  144. */
  145. timeout = jiffies + 2*HZ;
  146. while (time_before(jiffies, timeout)) {
  147. /*
  148. * Has the boot CPU finished it's STARTUP sequence?
  149. */
  150. if (cpu_isset(cpuid, cpu_callout_map))
  151. break;
  152. cpu_relax();
  153. }
  154. if (!time_before(jiffies, timeout)) {
  155. panic("smp_callin: CPU%d started up but did not get a callout!\n",
  156. cpuid);
  157. }
  158. /*
  159. * the boot CPU has finished the init stage and is spinning
  160. * on callin_map until we finish. We are free to set up this
  161. * CPU, first the APIC. (this is probably redundant on most
  162. * boards)
  163. */
  164. Dprintk("CALLIN, before setup_local_APIC().\n");
  165. setup_local_APIC();
  166. end_local_APIC_setup();
  167. /*
  168. * Get our bogomips.
  169. *
  170. * Need to enable IRQs because it can take longer and then
  171. * the NMI watchdog might kill us.
  172. */
  173. local_irq_enable();
  174. calibrate_delay();
  175. local_irq_disable();
  176. Dprintk("Stack at about %p\n",&cpuid);
  177. /*
  178. * Save our processor parameters
  179. */
  180. smp_store_cpu_info(cpuid);
  181. /*
  182. * Allow the master to continue.
  183. */
  184. cpu_set(cpuid, cpu_callin_map);
  185. }
  186. /* maps the cpu to the sched domain representing multi-core */
  187. cpumask_t cpu_coregroup_map(int cpu)
  188. {
  189. struct cpuinfo_x86 *c = &cpu_data(cpu);
  190. /*
  191. * For perf, we return last level cache shared map.
  192. * And for power savings, we return cpu_core_map
  193. */
  194. if (sched_mc_power_savings || sched_smt_power_savings)
  195. return per_cpu(cpu_core_map, cpu);
  196. else
  197. return c->llc_shared_map;
  198. }
  199. /* representing cpus for which sibling maps can be computed */
  200. static cpumask_t cpu_sibling_setup_map;
  201. static inline void set_cpu_sibling_map(int cpu)
  202. {
  203. int i;
  204. struct cpuinfo_x86 *c = &cpu_data(cpu);
  205. cpu_set(cpu, cpu_sibling_setup_map);
  206. if (smp_num_siblings > 1) {
  207. for_each_cpu_mask(i, cpu_sibling_setup_map) {
  208. if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
  209. c->cpu_core_id == cpu_data(i).cpu_core_id) {
  210. cpu_set(i, per_cpu(cpu_sibling_map, cpu));
  211. cpu_set(cpu, per_cpu(cpu_sibling_map, i));
  212. cpu_set(i, per_cpu(cpu_core_map, cpu));
  213. cpu_set(cpu, per_cpu(cpu_core_map, i));
  214. cpu_set(i, c->llc_shared_map);
  215. cpu_set(cpu, cpu_data(i).llc_shared_map);
  216. }
  217. }
  218. } else {
  219. cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
  220. }
  221. cpu_set(cpu, c->llc_shared_map);
  222. if (current_cpu_data.x86_max_cores == 1) {
  223. per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
  224. c->booted_cores = 1;
  225. return;
  226. }
  227. for_each_cpu_mask(i, cpu_sibling_setup_map) {
  228. if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
  229. per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
  230. cpu_set(i, c->llc_shared_map);
  231. cpu_set(cpu, cpu_data(i).llc_shared_map);
  232. }
  233. if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
  234. cpu_set(i, per_cpu(cpu_core_map, cpu));
  235. cpu_set(cpu, per_cpu(cpu_core_map, i));
  236. /*
  237. * Does this new cpu bringup a new core?
  238. */
  239. if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
  240. /*
  241. * for each core in package, increment
  242. * the booted_cores for this new cpu
  243. */
  244. if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
  245. c->booted_cores++;
  246. /*
  247. * increment the core count for all
  248. * the other cpus in this package
  249. */
  250. if (i != cpu)
  251. cpu_data(i).booted_cores++;
  252. } else if (i != cpu && !c->booted_cores)
  253. c->booted_cores = cpu_data(i).booted_cores;
  254. }
  255. }
  256. }
  257. /*
  258. * Setup code on secondary processor (after comming out of the trampoline)
  259. */
  260. void __cpuinit start_secondary(void)
  261. {
  262. /*
  263. * Dont put anything before smp_callin(), SMP
  264. * booting is too fragile that we want to limit the
  265. * things done here to the most necessary things.
  266. */
  267. cpu_init();
  268. preempt_disable();
  269. smp_callin();
  270. /* otherwise gcc will move up the smp_processor_id before the cpu_init */
  271. barrier();
  272. /*
  273. * Check TSC sync first:
  274. */
  275. check_tsc_sync_target();
  276. if (nmi_watchdog == NMI_IO_APIC) {
  277. disable_8259A_irq(0);
  278. enable_NMI_through_LVT0();
  279. enable_8259A_irq(0);
  280. }
  281. /*
  282. * The sibling maps must be set before turing the online map on for
  283. * this cpu
  284. */
  285. set_cpu_sibling_map(smp_processor_id());
  286. /*
  287. * We need to hold call_lock, so there is no inconsistency
  288. * between the time smp_call_function() determines number of
  289. * IPI recipients, and the time when the determination is made
  290. * for which cpus receive the IPI in genapic_flat.c. Holding this
  291. * lock helps us to not include this cpu in a currently in progress
  292. * smp_call_function().
  293. */
  294. lock_ipi_call_lock();
  295. spin_lock(&vector_lock);
  296. /* Setup the per cpu irq handling data structures */
  297. __setup_vector_irq(smp_processor_id());
  298. /*
  299. * Allow the master to continue.
  300. */
  301. cpu_set(smp_processor_id(), cpu_online_map);
  302. per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
  303. spin_unlock(&vector_lock);
  304. unlock_ipi_call_lock();
  305. setup_secondary_clock();
  306. cpu_idle();
  307. }
  308. extern volatile unsigned long init_rsp;
  309. extern void (*initial_code)(void);
  310. #ifdef APIC_DEBUG
  311. static void inquire_remote_apic(int apicid)
  312. {
  313. unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
  314. char *names[] = { "ID", "VERSION", "SPIV" };
  315. int timeout;
  316. u32 status;
  317. printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
  318. for (i = 0; i < ARRAY_SIZE(regs); i++) {
  319. printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
  320. /*
  321. * Wait for idle.
  322. */
  323. status = safe_apic_wait_icr_idle();
  324. if (status)
  325. printk(KERN_CONT
  326. "a previous APIC delivery may have failed\n");
  327. apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
  328. apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
  329. timeout = 0;
  330. do {
  331. udelay(100);
  332. status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
  333. } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
  334. switch (status) {
  335. case APIC_ICR_RR_VALID:
  336. status = apic_read(APIC_RRR);
  337. printk(KERN_CONT "%08x\n", status);
  338. break;
  339. default:
  340. printk(KERN_CONT "failed\n");
  341. }
  342. }
  343. }
  344. #endif
  345. /*
  346. * Kick the secondary to wake up.
  347. */
  348. static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
  349. {
  350. unsigned long send_status, accept_status = 0;
  351. int maxlvt, num_starts, j;
  352. Dprintk("Asserting INIT.\n");
  353. /*
  354. * Turn INIT on target chip
  355. */
  356. apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  357. /*
  358. * Send IPI
  359. */
  360. apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
  361. | APIC_DM_INIT);
  362. Dprintk("Waiting for send to finish...\n");
  363. send_status = safe_apic_wait_icr_idle();
  364. mdelay(10);
  365. Dprintk("Deasserting INIT.\n");
  366. /* Target chip */
  367. apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  368. /* Send IPI */
  369. apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
  370. Dprintk("Waiting for send to finish...\n");
  371. send_status = safe_apic_wait_icr_idle();
  372. mb();
  373. atomic_set(&init_deasserted, 1);
  374. num_starts = 2;
  375. /*
  376. * Run STARTUP IPI loop.
  377. */
  378. Dprintk("#startup loops: %d.\n", num_starts);
  379. maxlvt = lapic_get_maxlvt();
  380. for (j = 1; j <= num_starts; j++) {
  381. Dprintk("Sending STARTUP #%d.\n",j);
  382. apic_write(APIC_ESR, 0);
  383. apic_read(APIC_ESR);
  384. Dprintk("After apic_write.\n");
  385. /*
  386. * STARTUP IPI
  387. */
  388. /* Target chip */
  389. apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  390. /* Boot on the stack */
  391. /* Kick the second */
  392. apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
  393. /*
  394. * Give the other CPU some time to accept the IPI.
  395. */
  396. udelay(300);
  397. Dprintk("Startup point 1.\n");
  398. Dprintk("Waiting for send to finish...\n");
  399. send_status = safe_apic_wait_icr_idle();
  400. /*
  401. * Give the other CPU some time to accept the IPI.
  402. */
  403. udelay(200);
  404. /*
  405. * Due to the Pentium erratum 3AP.
  406. */
  407. if (maxlvt > 3) {
  408. apic_write(APIC_ESR, 0);
  409. }
  410. accept_status = (apic_read(APIC_ESR) & 0xEF);
  411. if (send_status || accept_status)
  412. break;
  413. }
  414. Dprintk("After Startup.\n");
  415. if (send_status)
  416. printk(KERN_ERR "APIC never delivered???\n");
  417. if (accept_status)
  418. printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
  419. return (send_status | accept_status);
  420. }
  421. struct create_idle {
  422. struct work_struct work;
  423. struct task_struct *idle;
  424. struct completion done;
  425. int cpu;
  426. };
  427. static void __cpuinit do_fork_idle(struct work_struct *work)
  428. {
  429. struct create_idle *c_idle =
  430. container_of(work, struct create_idle, work);
  431. c_idle->idle = fork_idle(c_idle->cpu);
  432. complete(&c_idle->done);
  433. }
  434. /*
  435. * Boot one CPU.
  436. */
  437. static int __cpuinit do_boot_cpu(int cpu, int apicid)
  438. {
  439. unsigned long boot_error;
  440. int timeout;
  441. unsigned long start_rip;
  442. struct create_idle c_idle = {
  443. .cpu = cpu,
  444. .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
  445. };
  446. INIT_WORK(&c_idle.work, do_fork_idle);
  447. /* allocate memory for gdts of secondary cpus. Hotplug is considered */
  448. if (!cpu_gdt_descr[cpu].address &&
  449. !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
  450. printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
  451. return -1;
  452. }
  453. /* Allocate node local memory for AP pdas */
  454. if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
  455. struct x8664_pda *newpda, *pda;
  456. int node = cpu_to_node(cpu);
  457. pda = cpu_pda(cpu);
  458. newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
  459. node);
  460. if (newpda) {
  461. memcpy(newpda, pda, sizeof (struct x8664_pda));
  462. cpu_pda(cpu) = newpda;
  463. } else
  464. printk(KERN_ERR
  465. "Could not allocate node local PDA for CPU %d on node %d\n",
  466. cpu, node);
  467. }
  468. alternatives_smp_switch(1);
  469. c_idle.idle = get_idle_for_cpu(cpu);
  470. if (c_idle.idle) {
  471. c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
  472. (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
  473. init_idle(c_idle.idle, cpu);
  474. goto do_rest;
  475. }
  476. /*
  477. * During cold boot process, keventd thread is not spun up yet.
  478. * When we do cpu hot-add, we create idle threads on the fly, we should
  479. * not acquire any attributes from the calling context. Hence the clean
  480. * way to create kernel_threads() is to do that from keventd().
  481. * We do the current_is_keventd() due to the fact that ACPI notifier
  482. * was also queuing to keventd() and when the caller is already running
  483. * in context of keventd(), we would end up with locking up the keventd
  484. * thread.
  485. */
  486. if (!keventd_up() || current_is_keventd())
  487. c_idle.work.func(&c_idle.work);
  488. else {
  489. schedule_work(&c_idle.work);
  490. wait_for_completion(&c_idle.done);
  491. }
  492. if (IS_ERR(c_idle.idle)) {
  493. printk("failed fork for CPU %d\n", cpu);
  494. return PTR_ERR(c_idle.idle);
  495. }
  496. set_idle_for_cpu(cpu, c_idle.idle);
  497. do_rest:
  498. cpu_pda(cpu)->pcurrent = c_idle.idle;
  499. start_rip = setup_trampoline();
  500. init_rsp = c_idle.idle->thread.sp;
  501. load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
  502. initial_code = start_secondary;
  503. clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
  504. printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
  505. cpus_weight(cpu_present_map),
  506. apicid);
  507. /*
  508. * This grunge runs the startup process for
  509. * the targeted processor.
  510. */
  511. atomic_set(&init_deasserted, 0);
  512. Dprintk("Setting warm reset code and vector.\n");
  513. CMOS_WRITE(0xa, 0xf);
  514. local_flush_tlb();
  515. Dprintk("1.\n");
  516. *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
  517. Dprintk("2.\n");
  518. *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
  519. Dprintk("3.\n");
  520. /*
  521. * Be paranoid about clearing APIC errors.
  522. */
  523. apic_write(APIC_ESR, 0);
  524. apic_read(APIC_ESR);
  525. /*
  526. * Status is now clean
  527. */
  528. boot_error = 0;
  529. /*
  530. * Starting actual IPI sequence...
  531. */
  532. boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
  533. if (!boot_error) {
  534. /*
  535. * allow APs to start initializing.
  536. */
  537. Dprintk("Before Callout %d.\n", cpu);
  538. cpu_set(cpu, cpu_callout_map);
  539. Dprintk("After Callout %d.\n", cpu);
  540. /*
  541. * Wait 5s total for a response
  542. */
  543. for (timeout = 0; timeout < 50000; timeout++) {
  544. if (cpu_isset(cpu, cpu_callin_map))
  545. break; /* It has booted */
  546. udelay(100);
  547. }
  548. if (cpu_isset(cpu, cpu_callin_map)) {
  549. /* number CPUs logically, starting from 1 (BSP is 0) */
  550. Dprintk("CPU has booted.\n");
  551. } else {
  552. boot_error = 1;
  553. if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
  554. == 0xA5)
  555. /* trampoline started but...? */
  556. printk("Stuck ??\n");
  557. else
  558. /* trampoline code not run */
  559. printk("Not responding.\n");
  560. #ifdef APIC_DEBUG
  561. inquire_remote_apic(apicid);
  562. #endif
  563. }
  564. }
  565. if (boot_error) {
  566. cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
  567. clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
  568. clear_node_cpumask(cpu); /* was set by numa_add_cpu */
  569. cpu_clear(cpu, cpu_present_map);
  570. cpu_clear(cpu, cpu_possible_map);
  571. per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
  572. return -EIO;
  573. }
  574. return 0;
  575. }
  576. cycles_t cacheflush_time;
  577. unsigned long cache_decay_ticks;
  578. /*
  579. * Cleanup possible dangling ends...
  580. */
  581. static __cpuinit void smp_cleanup_boot(void)
  582. {
  583. /*
  584. * Paranoid: Set warm reset code and vector here back
  585. * to default values.
  586. */
  587. CMOS_WRITE(0, 0xf);
  588. /*
  589. * Reset trampoline flag
  590. */
  591. *((volatile int *) phys_to_virt(0x467)) = 0;
  592. }
  593. /*
  594. * Fall back to non SMP mode after errors.
  595. *
  596. * RED-PEN audit/test this more. I bet there is more state messed up here.
  597. */
  598. static __init void disable_smp(void)
  599. {
  600. cpu_present_map = cpumask_of_cpu(0);
  601. cpu_possible_map = cpumask_of_cpu(0);
  602. if (smp_found_config)
  603. phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
  604. else
  605. phys_cpu_present_map = physid_mask_of_physid(0);
  606. cpu_set(0, per_cpu(cpu_sibling_map, 0));
  607. cpu_set(0, per_cpu(cpu_core_map, 0));
  608. }
  609. /*
  610. * Various sanity checks.
  611. */
  612. static int __init smp_sanity_check(unsigned max_cpus)
  613. {
  614. if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
  615. printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
  616. hard_smp_processor_id());
  617. physid_set(hard_smp_processor_id(), phys_cpu_present_map);
  618. }
  619. /*
  620. * If we couldn't find an SMP configuration at boot time,
  621. * get out of here now!
  622. */
  623. if (!smp_found_config) {
  624. printk(KERN_NOTICE "SMP motherboard not detected.\n");
  625. disable_smp();
  626. if (APIC_init_uniprocessor())
  627. printk(KERN_NOTICE "Local APIC not detected."
  628. " Using dummy APIC emulation.\n");
  629. return -1;
  630. }
  631. /*
  632. * Should not be necessary because the MP table should list the boot
  633. * CPU too, but we do it for the sake of robustness anyway.
  634. */
  635. if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
  636. printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
  637. boot_cpu_id);
  638. physid_set(hard_smp_processor_id(), phys_cpu_present_map);
  639. }
  640. /*
  641. * If we couldn't find a local APIC, then get out of here now!
  642. */
  643. if (!cpu_has_apic) {
  644. printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
  645. boot_cpu_id);
  646. printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
  647. nr_ioapics = 0;
  648. return -1;
  649. }
  650. /*
  651. * If SMP should be disabled, then really disable it!
  652. */
  653. if (!max_cpus) {
  654. printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
  655. nr_ioapics = 0;
  656. return -1;
  657. }
  658. return 0;
  659. }
  660. static void __init smp_cpu_index_default(void)
  661. {
  662. int i;
  663. struct cpuinfo_x86 *c;
  664. for_each_cpu_mask(i, cpu_possible_map) {
  665. c = &cpu_data(i);
  666. /* mark all to hotplug */
  667. c->cpu_index = NR_CPUS;
  668. }
  669. }
  670. /*
  671. * Prepare for SMP bootup. The MP table or ACPI has been read
  672. * earlier. Just do some sanity checking here and enable APIC mode.
  673. */
  674. void __init native_smp_prepare_cpus(unsigned int max_cpus)
  675. {
  676. nmi_watchdog_default();
  677. smp_cpu_index_default();
  678. current_cpu_data = boot_cpu_data;
  679. current_thread_info()->cpu = 0; /* needed? */
  680. set_cpu_sibling_map(0);
  681. if (smp_sanity_check(max_cpus) < 0) {
  682. printk(KERN_INFO "SMP disabled\n");
  683. disable_smp();
  684. return;
  685. }
  686. /*
  687. * Switch from PIC to APIC mode.
  688. */
  689. setup_local_APIC();
  690. /*
  691. * Enable IO APIC before setting up error vector
  692. */
  693. if (!skip_ioapic_setup && nr_ioapics)
  694. enable_IO_APIC();
  695. end_local_APIC_setup();
  696. if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
  697. panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
  698. GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
  699. /* Or can we switch back to PIC here? */
  700. }
  701. /*
  702. * Now start the IO-APICs
  703. */
  704. if (!skip_ioapic_setup && nr_ioapics)
  705. setup_IO_APIC();
  706. else
  707. nr_ioapics = 0;
  708. /*
  709. * Set up local APIC timer on boot CPU.
  710. */
  711. setup_boot_clock();
  712. }
  713. /*
  714. * Early setup to make printk work.
  715. */
  716. void __init native_smp_prepare_boot_cpu(void)
  717. {
  718. int me = smp_processor_id();
  719. /* already set me in cpu_online_map in boot_cpu_init() */
  720. cpu_set(me, cpu_callout_map);
  721. per_cpu(cpu_state, me) = CPU_ONLINE;
  722. }
  723. /*
  724. * Entry point to boot a CPU.
  725. */
  726. int __cpuinit native_cpu_up(unsigned int cpu)
  727. {
  728. int apicid = cpu_present_to_apicid(cpu);
  729. unsigned long flags;
  730. int err;
  731. WARN_ON(irqs_disabled());
  732. Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
  733. if (apicid == BAD_APICID || apicid == boot_cpu_id ||
  734. !physid_isset(apicid, phys_cpu_present_map)) {
  735. printk("__cpu_up: bad cpu %d\n", cpu);
  736. return -EINVAL;
  737. }
  738. /*
  739. * Already booted CPU?
  740. */
  741. if (cpu_isset(cpu, cpu_callin_map)) {
  742. Dprintk("do_boot_cpu %d Already started\n", cpu);
  743. return -ENOSYS;
  744. }
  745. /*
  746. * Save current MTRR state in case it was changed since early boot
  747. * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
  748. */
  749. mtrr_save_state();
  750. per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
  751. /* Boot it! */
  752. err = do_boot_cpu(cpu, apicid);
  753. if (err < 0) {
  754. Dprintk("do_boot_cpu failed %d\n", err);
  755. return err;
  756. }
  757. /* Unleash the CPU! */
  758. Dprintk("waiting for cpu %d\n", cpu);
  759. /*
  760. * Make sure and check TSC sync:
  761. */
  762. local_irq_save(flags);
  763. check_tsc_sync_source(cpu);
  764. local_irq_restore(flags);
  765. while (!cpu_isset(cpu, cpu_online_map))
  766. cpu_relax();
  767. err = 0;
  768. return err;
  769. }
  770. /*
  771. * Finish the SMP boot.
  772. */
  773. void __init native_smp_cpus_done(unsigned int max_cpus)
  774. {
  775. smp_cleanup_boot();
  776. setup_ioapic_dest();
  777. check_nmi_watchdog();
  778. }
  779. #ifdef CONFIG_HOTPLUG_CPU
  780. static void remove_siblinginfo(int cpu)
  781. {
  782. int sibling;
  783. struct cpuinfo_x86 *c = &cpu_data(cpu);
  784. for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
  785. cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
  786. /*
  787. * last thread sibling in this cpu core going down
  788. */
  789. if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
  790. cpu_data(sibling).booted_cores--;
  791. }
  792. for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
  793. cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
  794. cpus_clear(per_cpu(cpu_sibling_map, cpu));
  795. cpus_clear(per_cpu(cpu_core_map, cpu));
  796. c->phys_proc_id = 0;
  797. c->cpu_core_id = 0;
  798. cpu_clear(cpu, cpu_sibling_setup_map);
  799. }
  800. static void __ref remove_cpu_from_maps(void)
  801. {
  802. int cpu = smp_processor_id();
  803. cpu_clear(cpu, cpu_callout_map);
  804. cpu_clear(cpu, cpu_callin_map);
  805. clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
  806. clear_node_cpumask(cpu);
  807. }
  808. int __cpu_disable(void)
  809. {
  810. int cpu = smp_processor_id();
  811. /*
  812. * Perhaps use cpufreq to drop frequency, but that could go
  813. * into generic code.
  814. *
  815. * We won't take down the boot processor on i386 due to some
  816. * interrupts only being able to be serviced by the BSP.
  817. * Especially so if we're not using an IOAPIC -zwane
  818. */
  819. if (cpu == 0)
  820. return -EBUSY;
  821. if (nmi_watchdog == NMI_LOCAL_APIC)
  822. stop_apic_nmi_watchdog(NULL);
  823. clear_local_APIC();
  824. /*
  825. * HACK:
  826. * Allow any queued timer interrupts to get serviced
  827. * This is only a temporary solution until we cleanup
  828. * fixup_irqs as we do for IA64.
  829. */
  830. local_irq_enable();
  831. mdelay(1);
  832. local_irq_disable();
  833. remove_siblinginfo(cpu);
  834. spin_lock(&vector_lock);
  835. /* It's now safe to remove this processor from the online map */
  836. cpu_clear(cpu, cpu_online_map);
  837. spin_unlock(&vector_lock);
  838. remove_cpu_from_maps();
  839. fixup_irqs(cpu_online_map);
  840. return 0;
  841. }
  842. void __cpu_die(unsigned int cpu)
  843. {
  844. /* We don't do anything here: idle task is faking death itself. */
  845. unsigned int i;
  846. for (i = 0; i < 10; i++) {
  847. /* They ack this in play_dead by setting CPU_DEAD */
  848. if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
  849. printk ("CPU %d is now offline\n", cpu);
  850. if (1 == num_online_cpus())
  851. alternatives_smp_switch(0);
  852. return;
  853. }
  854. msleep(100);
  855. }
  856. printk(KERN_ERR "CPU %u didn't die...\n", cpu);
  857. }
  858. #else /* ... !CONFIG_HOTPLUG_CPU */
  859. int __cpu_disable(void)
  860. {
  861. return -ENOSYS;
  862. }
  863. void __cpu_die(unsigned int cpu)
  864. {
  865. /* We said "no" in __cpu_disable */
  866. BUG();
  867. }
  868. #endif /* CONFIG_HOTPLUG_CPU */