cpu.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. /* CPU control.
  2. * (C) 2001, 2002, 2003, 2004 Rusty Russell
  3. *
  4. * This code is licenced under the GPL.
  5. */
  6. #include <linux/proc_fs.h>
  7. #include <linux/smp.h>
  8. #include <linux/init.h>
  9. #include <linux/notifier.h>
  10. #include <linux/sched.h>
  11. #include <linux/unistd.h>
  12. #include <linux/cpu.h>
  13. #include <linux/module.h>
  14. #include <linux/kthread.h>
  15. #include <linux/stop_machine.h>
  16. #include <linux/mutex.h>
  17. #include <linux/gfp.h>
  18. #ifdef CONFIG_SMP
  19. /* Serializes the updates to cpu_online_mask, cpu_present_mask */
  20. static DEFINE_MUTEX(cpu_add_remove_lock);
  21. static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
  22. /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  23. * Should always be manipulated under cpu_add_remove_lock
  24. */
  25. static int cpu_hotplug_disabled;
  26. static struct {
  27. struct task_struct *active_writer;
  28. struct mutex lock; /* Synchronizes accesses to refcount, */
  29. /*
  30. * Also blocks the new readers during
  31. * an ongoing cpu hotplug operation.
  32. */
  33. int refcount;
  34. } cpu_hotplug = {
  35. .active_writer = NULL,
  36. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  37. .refcount = 0,
  38. };
  39. #ifdef CONFIG_HOTPLUG_CPU
  40. void get_online_cpus(void)
  41. {
  42. might_sleep();
  43. if (cpu_hotplug.active_writer == current)
  44. return;
  45. mutex_lock(&cpu_hotplug.lock);
  46. cpu_hotplug.refcount++;
  47. mutex_unlock(&cpu_hotplug.lock);
  48. }
  49. EXPORT_SYMBOL_GPL(get_online_cpus);
  50. void put_online_cpus(void)
  51. {
  52. if (cpu_hotplug.active_writer == current)
  53. return;
  54. mutex_lock(&cpu_hotplug.lock);
  55. if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
  56. wake_up_process(cpu_hotplug.active_writer);
  57. mutex_unlock(&cpu_hotplug.lock);
  58. }
  59. EXPORT_SYMBOL_GPL(put_online_cpus);
  60. #endif /* CONFIG_HOTPLUG_CPU */
  61. /*
  62. * The following two API's must be used when attempting
  63. * to serialize the updates to cpu_online_mask, cpu_present_mask.
  64. */
  65. void cpu_maps_update_begin(void)
  66. {
  67. mutex_lock(&cpu_add_remove_lock);
  68. }
  69. void cpu_maps_update_done(void)
  70. {
  71. mutex_unlock(&cpu_add_remove_lock);
  72. }
  73. /*
  74. * This ensures that the hotplug operation can begin only when the
  75. * refcount goes to zero.
  76. *
  77. * Note that during a cpu-hotplug operation, the new readers, if any,
  78. * will be blocked by the cpu_hotplug.lock
  79. *
  80. * Since cpu_hotplug_begin() is always called after invoking
  81. * cpu_maps_update_begin(), we can be sure that only one writer is active.
  82. *
  83. * Note that theoretically, there is a possibility of a livelock:
  84. * - Refcount goes to zero, last reader wakes up the sleeping
  85. * writer.
  86. * - Last reader unlocks the cpu_hotplug.lock.
  87. * - A new reader arrives at this moment, bumps up the refcount.
  88. * - The writer acquires the cpu_hotplug.lock finds the refcount
  89. * non zero and goes to sleep again.
  90. *
  91. * However, this is very difficult to achieve in practice since
  92. * get_online_cpus() not an api which is called all that often.
  93. *
  94. */
  95. static void cpu_hotplug_begin(void)
  96. {
  97. cpu_hotplug.active_writer = current;
  98. for (;;) {
  99. mutex_lock(&cpu_hotplug.lock);
  100. if (likely(!cpu_hotplug.refcount))
  101. break;
  102. __set_current_state(TASK_UNINTERRUPTIBLE);
  103. mutex_unlock(&cpu_hotplug.lock);
  104. schedule();
  105. }
  106. }
  107. static void cpu_hotplug_done(void)
  108. {
  109. cpu_hotplug.active_writer = NULL;
  110. mutex_unlock(&cpu_hotplug.lock);
  111. }
  112. /* Need to know about CPUs going up/down? */
  113. int __ref register_cpu_notifier(struct notifier_block *nb)
  114. {
  115. int ret;
  116. cpu_maps_update_begin();
  117. ret = raw_notifier_chain_register(&cpu_chain, nb);
  118. cpu_maps_update_done();
  119. return ret;
  120. }
  121. #ifdef CONFIG_HOTPLUG_CPU
  122. EXPORT_SYMBOL(register_cpu_notifier);
  123. void __ref unregister_cpu_notifier(struct notifier_block *nb)
  124. {
  125. cpu_maps_update_begin();
  126. raw_notifier_chain_unregister(&cpu_chain, nb);
  127. cpu_maps_update_done();
  128. }
  129. EXPORT_SYMBOL(unregister_cpu_notifier);
  130. static inline void check_for_tasks(int cpu)
  131. {
  132. struct task_struct *p;
  133. write_lock_irq(&tasklist_lock);
  134. for_each_process(p) {
  135. if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
  136. (!cputime_eq(p->utime, cputime_zero) ||
  137. !cputime_eq(p->stime, cputime_zero)))
  138. printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
  139. "(state = %ld, flags = %x)\n",
  140. p->comm, task_pid_nr(p), cpu,
  141. p->state, p->flags);
  142. }
  143. write_unlock_irq(&tasklist_lock);
  144. }
  145. struct take_cpu_down_param {
  146. struct task_struct *caller;
  147. unsigned long mod;
  148. void *hcpu;
  149. };
  150. /* Take this CPU down. */
  151. static int __ref take_cpu_down(void *_param)
  152. {
  153. struct take_cpu_down_param *param = _param;
  154. unsigned int cpu = (unsigned long)param->hcpu;
  155. int err;
  156. /* Ensure this CPU doesn't handle any more interrupts. */
  157. err = __cpu_disable();
  158. if (err < 0)
  159. return err;
  160. raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
  161. param->hcpu);
  162. if (task_cpu(param->caller) == cpu)
  163. move_task_off_dead_cpu(cpu, param->caller);
  164. /* Force idle task to run as soon as we yield: it should
  165. immediately notice cpu is offline and die quickly. */
  166. sched_idle_next();
  167. return 0;
  168. }
  169. /* Requires cpu_add_remove_lock to be held */
  170. static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  171. {
  172. int err, nr_calls = 0;
  173. void *hcpu = (void *)(long)cpu;
  174. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  175. struct take_cpu_down_param tcd_param = {
  176. .caller = current,
  177. .mod = mod,
  178. .hcpu = hcpu,
  179. };
  180. if (num_online_cpus() == 1)
  181. return -EBUSY;
  182. if (!cpu_online(cpu))
  183. return -EINVAL;
  184. cpu_hotplug_begin();
  185. set_cpu_active(cpu, false);
  186. err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
  187. hcpu, -1, &nr_calls);
  188. if (err == NOTIFY_BAD) {
  189. set_cpu_active(cpu, true);
  190. nr_calls--;
  191. __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
  192. hcpu, nr_calls, NULL);
  193. printk("%s: attempt to take down CPU %u failed\n",
  194. __func__, cpu);
  195. err = -EINVAL;
  196. goto out_release;
  197. }
  198. err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
  199. if (err) {
  200. set_cpu_active(cpu, true);
  201. /* CPU didn't die: tell everyone. Can't complain. */
  202. if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
  203. hcpu) == NOTIFY_BAD)
  204. BUG();
  205. goto out_release;
  206. }
  207. BUG_ON(cpu_online(cpu));
  208. /* Wait for it to sleep (leaving idle task). */
  209. while (!idle_cpu(cpu))
  210. yield();
  211. /* This actually kills the CPU. */
  212. __cpu_die(cpu);
  213. /* CPU is completely dead: tell everyone. Too late to complain. */
  214. if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
  215. hcpu) == NOTIFY_BAD)
  216. BUG();
  217. check_for_tasks(cpu);
  218. out_release:
  219. cpu_hotplug_done();
  220. if (!err) {
  221. if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
  222. hcpu) == NOTIFY_BAD)
  223. BUG();
  224. }
  225. return err;
  226. }
  227. int __ref cpu_down(unsigned int cpu)
  228. {
  229. int err;
  230. cpu_maps_update_begin();
  231. if (cpu_hotplug_disabled) {
  232. err = -EBUSY;
  233. goto out;
  234. }
  235. err = _cpu_down(cpu, 0);
  236. out:
  237. cpu_maps_update_done();
  238. return err;
  239. }
  240. EXPORT_SYMBOL(cpu_down);
  241. #endif /*CONFIG_HOTPLUG_CPU*/
  242. /* Requires cpu_add_remove_lock to be held */
  243. static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
  244. {
  245. int ret, nr_calls = 0;
  246. void *hcpu = (void *)(long)cpu;
  247. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  248. if (cpu_online(cpu) || !cpu_present(cpu))
  249. return -EINVAL;
  250. cpu_hotplug_begin();
  251. ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
  252. -1, &nr_calls);
  253. if (ret == NOTIFY_BAD) {
  254. nr_calls--;
  255. printk("%s: attempt to bring up CPU %u failed\n",
  256. __func__, cpu);
  257. ret = -EINVAL;
  258. goto out_notify;
  259. }
  260. /* Arch-specific enabling code. */
  261. ret = __cpu_up(cpu);
  262. if (ret != 0)
  263. goto out_notify;
  264. BUG_ON(!cpu_online(cpu));
  265. set_cpu_active(cpu, true);
  266. /* Now call notifier in preparation. */
  267. raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
  268. out_notify:
  269. if (ret != 0)
  270. __raw_notifier_call_chain(&cpu_chain,
  271. CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
  272. cpu_hotplug_done();
  273. return ret;
  274. }
  275. int __cpuinit cpu_up(unsigned int cpu)
  276. {
  277. int err = 0;
  278. #ifdef CONFIG_MEMORY_HOTPLUG
  279. int nid;
  280. pg_data_t *pgdat;
  281. #endif
  282. if (!cpu_possible(cpu)) {
  283. printk(KERN_ERR "can't online cpu %d because it is not "
  284. "configured as may-hotadd at boot time\n", cpu);
  285. #if defined(CONFIG_IA64)
  286. printk(KERN_ERR "please check additional_cpus= boot "
  287. "parameter\n");
  288. #endif
  289. return -EINVAL;
  290. }
  291. #ifdef CONFIG_MEMORY_HOTPLUG
  292. nid = cpu_to_node(cpu);
  293. if (!node_online(nid)) {
  294. err = mem_online_node(nid);
  295. if (err)
  296. return err;
  297. }
  298. pgdat = NODE_DATA(nid);
  299. if (!pgdat) {
  300. printk(KERN_ERR
  301. "Can't online cpu %d due to NULL pgdat\n", cpu);
  302. return -ENOMEM;
  303. }
  304. if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
  305. mutex_lock(&zonelists_mutex);
  306. build_all_zonelists(NULL);
  307. mutex_unlock(&zonelists_mutex);
  308. }
  309. #endif
  310. cpu_maps_update_begin();
  311. if (cpu_hotplug_disabled) {
  312. err = -EBUSY;
  313. goto out;
  314. }
  315. err = _cpu_up(cpu, 0);
  316. out:
  317. cpu_maps_update_done();
  318. return err;
  319. }
  320. #ifdef CONFIG_PM_SLEEP_SMP
  321. static cpumask_var_t frozen_cpus;
  322. int disable_nonboot_cpus(void)
  323. {
  324. int cpu, first_cpu, error;
  325. cpu_maps_update_begin();
  326. first_cpu = cpumask_first(cpu_online_mask);
  327. /*
  328. * We take down all of the non-boot CPUs in one shot to avoid races
  329. * with the userspace trying to use the CPU hotplug at the same time
  330. */
  331. cpumask_clear(frozen_cpus);
  332. printk("Disabling non-boot CPUs ...\n");
  333. for_each_online_cpu(cpu) {
  334. if (cpu == first_cpu)
  335. continue;
  336. error = _cpu_down(cpu, 1);
  337. if (!error)
  338. cpumask_set_cpu(cpu, frozen_cpus);
  339. else {
  340. printk(KERN_ERR "Error taking CPU%d down: %d\n",
  341. cpu, error);
  342. break;
  343. }
  344. }
  345. if (!error) {
  346. BUG_ON(num_online_cpus() > 1);
  347. /* Make sure the CPUs won't be enabled by someone else */
  348. cpu_hotplug_disabled = 1;
  349. } else {
  350. printk(KERN_ERR "Non-boot CPUs are not disabled\n");
  351. }
  352. cpu_maps_update_done();
  353. return error;
  354. }
  355. void __weak arch_enable_nonboot_cpus_begin(void)
  356. {
  357. }
  358. void __weak arch_enable_nonboot_cpus_end(void)
  359. {
  360. }
  361. void __ref enable_nonboot_cpus(void)
  362. {
  363. int cpu, error;
  364. /* Allow everyone to use the CPU hotplug again */
  365. cpu_maps_update_begin();
  366. cpu_hotplug_disabled = 0;
  367. if (cpumask_empty(frozen_cpus))
  368. goto out;
  369. printk("Enabling non-boot CPUs ...\n");
  370. arch_enable_nonboot_cpus_begin();
  371. for_each_cpu(cpu, frozen_cpus) {
  372. error = _cpu_up(cpu, 1);
  373. if (!error) {
  374. printk("CPU%d is up\n", cpu);
  375. continue;
  376. }
  377. printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
  378. }
  379. arch_enable_nonboot_cpus_end();
  380. cpumask_clear(frozen_cpus);
  381. out:
  382. cpu_maps_update_done();
  383. }
  384. static int alloc_frozen_cpus(void)
  385. {
  386. if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
  387. return -ENOMEM;
  388. return 0;
  389. }
  390. core_initcall(alloc_frozen_cpus);
  391. #endif /* CONFIG_PM_SLEEP_SMP */
  392. /**
  393. * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
  394. * @cpu: cpu that just started
  395. *
  396. * This function calls the cpu_chain notifiers with CPU_STARTING.
  397. * It must be called by the arch code on the new cpu, before the new cpu
  398. * enables interrupts and before the "boot" cpu returns from __cpu_up().
  399. */
  400. void __cpuinit notify_cpu_starting(unsigned int cpu)
  401. {
  402. unsigned long val = CPU_STARTING;
  403. #ifdef CONFIG_PM_SLEEP_SMP
  404. if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
  405. val = CPU_STARTING_FROZEN;
  406. #endif /* CONFIG_PM_SLEEP_SMP */
  407. raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
  408. }
  409. #endif /* CONFIG_SMP */
  410. /*
  411. * cpu_bit_bitmap[] is a special, "compressed" data structure that
  412. * represents all NR_CPUS bits binary values of 1<<nr.
  413. *
  414. * It is used by cpumask_of() to get a constant address to a CPU
  415. * mask value that has a single bit set only.
  416. */
  417. /* cpu_bit_bitmap[0] is empty - so we can back into it */
  418. #define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x)
  419. #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
  420. #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
  421. #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
  422. const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
  423. MASK_DECLARE_8(0), MASK_DECLARE_8(8),
  424. MASK_DECLARE_8(16), MASK_DECLARE_8(24),
  425. #if BITS_PER_LONG > 32
  426. MASK_DECLARE_8(32), MASK_DECLARE_8(40),
  427. MASK_DECLARE_8(48), MASK_DECLARE_8(56),
  428. #endif
  429. };
  430. EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
  431. const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
  432. EXPORT_SYMBOL(cpu_all_bits);
  433. #ifdef CONFIG_INIT_ALL_POSSIBLE
  434. static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
  435. = CPU_BITS_ALL;
  436. #else
  437. static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
  438. #endif
  439. const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
  440. EXPORT_SYMBOL(cpu_possible_mask);
  441. static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
  442. const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
  443. EXPORT_SYMBOL(cpu_online_mask);
  444. static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
  445. const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
  446. EXPORT_SYMBOL(cpu_present_mask);
  447. static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
  448. const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
  449. EXPORT_SYMBOL(cpu_active_mask);
  450. void set_cpu_possible(unsigned int cpu, bool possible)
  451. {
  452. if (possible)
  453. cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
  454. else
  455. cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
  456. }
  457. void set_cpu_present(unsigned int cpu, bool present)
  458. {
  459. if (present)
  460. cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
  461. else
  462. cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
  463. }
  464. void set_cpu_online(unsigned int cpu, bool online)
  465. {
  466. if (online)
  467. cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
  468. else
  469. cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
  470. }
  471. void set_cpu_active(unsigned int cpu, bool active)
  472. {
  473. if (active)
  474. cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
  475. else
  476. cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
  477. }
  478. void init_cpu_present(const struct cpumask *src)
  479. {
  480. cpumask_copy(to_cpumask(cpu_present_bits), src);
  481. }
  482. void init_cpu_possible(const struct cpumask *src)
  483. {
  484. cpumask_copy(to_cpumask(cpu_possible_bits), src);
  485. }
  486. void init_cpu_online(const struct cpumask *src)
  487. {
  488. cpumask_copy(to_cpumask(cpu_online_bits), src);
  489. }