cpu.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. /* CPU control.
  2. * (C) 2001, 2002, 2003, 2004 Rusty Russell
  3. *
  4. * This code is licenced under the GPL.
  5. */
  6. #include <linux/proc_fs.h>
  7. #include <linux/smp.h>
  8. #include <linux/init.h>
  9. #include <linux/notifier.h>
  10. #include <linux/sched.h>
  11. #include <linux/unistd.h>
  12. #include <linux/cpu.h>
  13. #include <linux/module.h>
  14. #include <linux/kthread.h>
  15. #include <linux/stop_machine.h>
  16. #include <linux/mutex.h>
  17. #ifdef CONFIG_SMP
  18. /* Serializes the updates to cpu_online_mask, cpu_present_mask */
  19. static DEFINE_MUTEX(cpu_add_remove_lock);
  20. static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
  21. /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
  22. * Should always be manipulated under cpu_add_remove_lock
  23. */
  24. static int cpu_hotplug_disabled;
  25. static struct {
  26. struct task_struct *active_writer;
  27. struct mutex lock; /* Synchronizes accesses to refcount, */
  28. /*
  29. * Also blocks the new readers during
  30. * an ongoing cpu hotplug operation.
  31. */
  32. int refcount;
  33. } cpu_hotplug = {
  34. .active_writer = NULL,
  35. .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
  36. .refcount = 0,
  37. };
  38. #ifdef CONFIG_HOTPLUG_CPU
  39. void get_online_cpus(void)
  40. {
  41. might_sleep();
  42. if (cpu_hotplug.active_writer == current)
  43. return;
  44. mutex_lock(&cpu_hotplug.lock);
  45. cpu_hotplug.refcount++;
  46. mutex_unlock(&cpu_hotplug.lock);
  47. }
  48. EXPORT_SYMBOL_GPL(get_online_cpus);
  49. void put_online_cpus(void)
  50. {
  51. if (cpu_hotplug.active_writer == current)
  52. return;
  53. mutex_lock(&cpu_hotplug.lock);
  54. if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
  55. wake_up_process(cpu_hotplug.active_writer);
  56. mutex_unlock(&cpu_hotplug.lock);
  57. }
  58. EXPORT_SYMBOL_GPL(put_online_cpus);
  59. #endif /* CONFIG_HOTPLUG_CPU */
  60. /*
  61. * The following two API's must be used when attempting
  62. * to serialize the updates to cpu_online_mask, cpu_present_mask.
  63. */
  64. void cpu_maps_update_begin(void)
  65. {
  66. mutex_lock(&cpu_add_remove_lock);
  67. }
  68. void cpu_maps_update_done(void)
  69. {
  70. mutex_unlock(&cpu_add_remove_lock);
  71. }
  72. /*
  73. * This ensures that the hotplug operation can begin only when the
  74. * refcount goes to zero.
  75. *
  76. * Note that during a cpu-hotplug operation, the new readers, if any,
  77. * will be blocked by the cpu_hotplug.lock
  78. *
  79. * Since cpu_hotplug_begin() is always called after invoking
  80. * cpu_maps_update_begin(), we can be sure that only one writer is active.
  81. *
  82. * Note that theoretically, there is a possibility of a livelock:
  83. * - Refcount goes to zero, last reader wakes up the sleeping
  84. * writer.
  85. * - Last reader unlocks the cpu_hotplug.lock.
  86. * - A new reader arrives at this moment, bumps up the refcount.
  87. * - The writer acquires the cpu_hotplug.lock finds the refcount
  88. * non zero and goes to sleep again.
  89. *
  90. * However, this is very difficult to achieve in practice since
  91. * get_online_cpus() not an api which is called all that often.
  92. *
  93. */
  94. static void cpu_hotplug_begin(void)
  95. {
  96. cpu_hotplug.active_writer = current;
  97. for (;;) {
  98. mutex_lock(&cpu_hotplug.lock);
  99. if (likely(!cpu_hotplug.refcount))
  100. break;
  101. __set_current_state(TASK_UNINTERRUPTIBLE);
  102. mutex_unlock(&cpu_hotplug.lock);
  103. schedule();
  104. }
  105. }
  106. static void cpu_hotplug_done(void)
  107. {
  108. cpu_hotplug.active_writer = NULL;
  109. mutex_unlock(&cpu_hotplug.lock);
  110. }
  111. /* Need to know about CPUs going up/down? */
  112. int __ref register_cpu_notifier(struct notifier_block *nb)
  113. {
  114. int ret;
  115. cpu_maps_update_begin();
  116. ret = raw_notifier_chain_register(&cpu_chain, nb);
  117. cpu_maps_update_done();
  118. return ret;
  119. }
  120. #ifdef CONFIG_HOTPLUG_CPU
  121. EXPORT_SYMBOL(register_cpu_notifier);
  122. void __ref unregister_cpu_notifier(struct notifier_block *nb)
  123. {
  124. cpu_maps_update_begin();
  125. raw_notifier_chain_unregister(&cpu_chain, nb);
  126. cpu_maps_update_done();
  127. }
  128. EXPORT_SYMBOL(unregister_cpu_notifier);
  129. static inline void check_for_tasks(int cpu)
  130. {
  131. struct task_struct *p;
  132. write_lock_irq(&tasklist_lock);
  133. for_each_process(p) {
  134. if (task_cpu(p) == cpu &&
  135. (!cputime_eq(p->utime, cputime_zero) ||
  136. !cputime_eq(p->stime, cputime_zero)))
  137. printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
  138. (state = %ld, flags = %x) \n",
  139. p->comm, task_pid_nr(p), cpu,
  140. p->state, p->flags);
  141. }
  142. write_unlock_irq(&tasklist_lock);
  143. }
  144. struct take_cpu_down_param {
  145. unsigned long mod;
  146. void *hcpu;
  147. };
  148. /* Take this CPU down. */
  149. static int __ref take_cpu_down(void *_param)
  150. {
  151. struct take_cpu_down_param *param = _param;
  152. int err;
  153. /* Ensure this CPU doesn't handle any more interrupts. */
  154. err = __cpu_disable();
  155. if (err < 0)
  156. return err;
  157. raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
  158. param->hcpu);
  159. /* Force idle task to run as soon as we yield: it should
  160. immediately notice cpu is offline and die quickly. */
  161. sched_idle_next();
  162. return 0;
  163. }
  164. /* Requires cpu_add_remove_lock to be held */
  165. static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
  166. {
  167. int err, nr_calls = 0;
  168. cpumask_var_t old_allowed;
  169. void *hcpu = (void *)(long)cpu;
  170. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  171. struct take_cpu_down_param tcd_param = {
  172. .mod = mod,
  173. .hcpu = hcpu,
  174. };
  175. if (num_online_cpus() == 1)
  176. return -EBUSY;
  177. if (!cpu_online(cpu))
  178. return -EINVAL;
  179. if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
  180. return -ENOMEM;
  181. cpu_hotplug_begin();
  182. err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
  183. hcpu, -1, &nr_calls);
  184. if (err == NOTIFY_BAD) {
  185. set_cpu_active(cpu, true);
  186. nr_calls--;
  187. __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
  188. hcpu, nr_calls, NULL);
  189. printk("%s: attempt to take down CPU %u failed\n",
  190. __func__, cpu);
  191. err = -EINVAL;
  192. goto out_release;
  193. }
  194. /* Ensure that we are not runnable on dying cpu */
  195. cpumask_copy(old_allowed, &current->cpus_allowed);
  196. set_cpus_allowed_ptr(current, cpu_active_mask);
  197. err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
  198. if (err) {
  199. set_cpu_active(cpu, true);
  200. /* CPU didn't die: tell everyone. Can't complain. */
  201. if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
  202. hcpu) == NOTIFY_BAD)
  203. BUG();
  204. goto out_allowed;
  205. }
  206. BUG_ON(cpu_online(cpu));
  207. /* Wait for it to sleep (leaving idle task). */
  208. while (!idle_cpu(cpu))
  209. yield();
  210. /* This actually kills the CPU. */
  211. __cpu_die(cpu);
  212. /* CPU is completely dead: tell everyone. Too late to complain. */
  213. if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
  214. hcpu) == NOTIFY_BAD)
  215. BUG();
  216. check_for_tasks(cpu);
  217. out_allowed:
  218. set_cpus_allowed_ptr(current, old_allowed);
  219. out_release:
  220. cpu_hotplug_done();
  221. if (!err) {
  222. if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
  223. hcpu) == NOTIFY_BAD)
  224. BUG();
  225. }
  226. free_cpumask_var(old_allowed);
  227. return err;
  228. }
  229. int __ref cpu_down(unsigned int cpu)
  230. {
  231. int err;
  232. err = stop_machine_create();
  233. if (err)
  234. return err;
  235. cpu_maps_update_begin();
  236. if (cpu_hotplug_disabled) {
  237. err = -EBUSY;
  238. goto out;
  239. }
  240. set_cpu_active(cpu, false);
  241. /*
  242. * Make sure the all cpus did the reschedule and are not
  243. * using stale version of the cpu_active_mask.
  244. * This is not strictly necessary becuase stop_machine()
  245. * that we run down the line already provides the required
  246. * synchronization. But it's really a side effect and we do not
  247. * want to depend on the innards of the stop_machine here.
  248. */
  249. synchronize_sched();
  250. err = _cpu_down(cpu, 0);
  251. out:
  252. cpu_maps_update_done();
  253. stop_machine_destroy();
  254. return err;
  255. }
  256. EXPORT_SYMBOL(cpu_down);
  257. #endif /*CONFIG_HOTPLUG_CPU*/
  258. /* Requires cpu_add_remove_lock to be held */
  259. static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
  260. {
  261. int ret, nr_calls = 0;
  262. void *hcpu = (void *)(long)cpu;
  263. unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
  264. if (cpu_online(cpu) || !cpu_present(cpu))
  265. return -EINVAL;
  266. cpu_hotplug_begin();
  267. ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
  268. -1, &nr_calls);
  269. if (ret == NOTIFY_BAD) {
  270. nr_calls--;
  271. printk("%s: attempt to bring up CPU %u failed\n",
  272. __func__, cpu);
  273. ret = -EINVAL;
  274. goto out_notify;
  275. }
  276. /* Arch-specific enabling code. */
  277. ret = __cpu_up(cpu);
  278. if (ret != 0)
  279. goto out_notify;
  280. BUG_ON(!cpu_online(cpu));
  281. set_cpu_active(cpu, true);
  282. /* Now call notifier in preparation. */
  283. raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
  284. out_notify:
  285. if (ret != 0)
  286. __raw_notifier_call_chain(&cpu_chain,
  287. CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
  288. cpu_hotplug_done();
  289. return ret;
  290. }
  291. int __cpuinit cpu_up(unsigned int cpu)
  292. {
  293. int err = 0;
  294. if (!cpu_possible(cpu)) {
  295. printk(KERN_ERR "can't online cpu %d because it is not "
  296. "configured as may-hotadd at boot time\n", cpu);
  297. #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
  298. printk(KERN_ERR "please check additional_cpus= boot "
  299. "parameter\n");
  300. #endif
  301. return -EINVAL;
  302. }
  303. cpu_maps_update_begin();
  304. if (cpu_hotplug_disabled) {
  305. err = -EBUSY;
  306. goto out;
  307. }
  308. err = _cpu_up(cpu, 0);
  309. out:
  310. cpu_maps_update_done();
  311. return err;
  312. }
  313. #ifdef CONFIG_PM_SLEEP_SMP
  314. static cpumask_var_t frozen_cpus;
  315. int disable_nonboot_cpus(void)
  316. {
  317. int cpu, first_cpu, error;
  318. error = stop_machine_create();
  319. if (error)
  320. return error;
  321. cpu_maps_update_begin();
  322. first_cpu = cpumask_first(cpu_online_mask);
  323. /* We take down all of the non-boot CPUs in one shot to avoid races
  324. * with the userspace trying to use the CPU hotplug at the same time
  325. */
  326. cpumask_clear(frozen_cpus);
  327. for_each_online_cpu(cpu) {
  328. if (cpu == first_cpu)
  329. continue;
  330. set_cpu_active(cpu, false);
  331. }
  332. synchronize_sched();
  333. printk("Disabling non-boot CPUs ...\n");
  334. for_each_online_cpu(cpu) {
  335. if (cpu == first_cpu)
  336. continue;
  337. error = _cpu_down(cpu, 1);
  338. if (!error)
  339. cpumask_set_cpu(cpu, frozen_cpus);
  340. else {
  341. printk(KERN_ERR "Error taking CPU%d down: %d\n",
  342. cpu, error);
  343. break;
  344. }
  345. }
  346. if (!error) {
  347. BUG_ON(num_online_cpus() > 1);
  348. /* Make sure the CPUs won't be enabled by someone else */
  349. cpu_hotplug_disabled = 1;
  350. } else {
  351. printk(KERN_ERR "Non-boot CPUs are not disabled\n");
  352. }
  353. cpu_maps_update_done();
  354. stop_machine_destroy();
  355. return error;
  356. }
  357. void __weak arch_enable_nonboot_cpus_begin(void)
  358. {
  359. }
  360. void __weak arch_enable_nonboot_cpus_end(void)
  361. {
  362. }
  363. void __ref enable_nonboot_cpus(void)
  364. {
  365. int cpu, error;
  366. /* Allow everyone to use the CPU hotplug again */
  367. cpu_maps_update_begin();
  368. cpu_hotplug_disabled = 0;
  369. if (cpumask_empty(frozen_cpus))
  370. goto out;
  371. printk("Enabling non-boot CPUs ...\n");
  372. arch_enable_nonboot_cpus_begin();
  373. for_each_cpu(cpu, frozen_cpus) {
  374. error = _cpu_up(cpu, 1);
  375. if (!error) {
  376. printk("CPU%d is up\n", cpu);
  377. continue;
  378. }
  379. printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
  380. }
  381. arch_enable_nonboot_cpus_end();
  382. cpumask_clear(frozen_cpus);
  383. out:
  384. cpu_maps_update_done();
  385. }
  386. static int alloc_frozen_cpus(void)
  387. {
  388. if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
  389. return -ENOMEM;
  390. return 0;
  391. }
  392. core_initcall(alloc_frozen_cpus);
  393. #endif /* CONFIG_PM_SLEEP_SMP */
  394. /**
  395. * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
  396. * @cpu: cpu that just started
  397. *
  398. * This function calls the cpu_chain notifiers with CPU_STARTING.
  399. * It must be called by the arch code on the new cpu, before the new cpu
  400. * enables interrupts and before the "boot" cpu returns from __cpu_up().
  401. */
  402. void __cpuinit notify_cpu_starting(unsigned int cpu)
  403. {
  404. unsigned long val = CPU_STARTING;
  405. #ifdef CONFIG_PM_SLEEP_SMP
  406. if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
  407. val = CPU_STARTING_FROZEN;
  408. #endif /* CONFIG_PM_SLEEP_SMP */
  409. raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
  410. }
  411. #endif /* CONFIG_SMP */
  412. /*
  413. * cpu_bit_bitmap[] is a special, "compressed" data structure that
  414. * represents all NR_CPUS bits binary values of 1<<nr.
  415. *
  416. * It is used by cpumask_of() to get a constant address to a CPU
  417. * mask value that has a single bit set only.
  418. */
  419. /* cpu_bit_bitmap[0] is empty - so we can back into it */
  420. #define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x)
  421. #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
  422. #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
  423. #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
  424. const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
  425. MASK_DECLARE_8(0), MASK_DECLARE_8(8),
  426. MASK_DECLARE_8(16), MASK_DECLARE_8(24),
  427. #if BITS_PER_LONG > 32
  428. MASK_DECLARE_8(32), MASK_DECLARE_8(40),
  429. MASK_DECLARE_8(48), MASK_DECLARE_8(56),
  430. #endif
  431. };
  432. EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
  433. const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
  434. EXPORT_SYMBOL(cpu_all_bits);
  435. #ifdef CONFIG_INIT_ALL_POSSIBLE
  436. static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
  437. = CPU_BITS_ALL;
  438. #else
  439. static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
  440. #endif
  441. const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
  442. EXPORT_SYMBOL(cpu_possible_mask);
  443. static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
  444. const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
  445. EXPORT_SYMBOL(cpu_online_mask);
  446. static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
  447. const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
  448. EXPORT_SYMBOL(cpu_present_mask);
  449. static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
  450. const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
  451. EXPORT_SYMBOL(cpu_active_mask);
  452. void set_cpu_possible(unsigned int cpu, bool possible)
  453. {
  454. if (possible)
  455. cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
  456. else
  457. cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
  458. }
  459. void set_cpu_present(unsigned int cpu, bool present)
  460. {
  461. if (present)
  462. cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
  463. else
  464. cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
  465. }
  466. void set_cpu_online(unsigned int cpu, bool online)
  467. {
  468. if (online)
  469. cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
  470. else
  471. cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
  472. }
  473. void set_cpu_active(unsigned int cpu, bool active)
  474. {
  475. if (active)
  476. cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
  477. else
  478. cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
  479. }
  480. void init_cpu_present(const struct cpumask *src)
  481. {
  482. cpumask_copy(to_cpumask(cpu_present_bits), src);
  483. }
  484. void init_cpu_possible(const struct cpumask *src)
  485. {
  486. cpumask_copy(to_cpumask(cpu_possible_bits), src);
  487. }
  488. void init_cpu_online(const struct cpumask *src)
  489. {
  490. cpumask_copy(to_cpumask(cpu_online_bits), src);
  491. }