rcupreempt.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953
  1. /*
  2. * Read-Copy Update mechanism for mutual exclusion, realtime implementation
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. *
  18. * Copyright IBM Corporation, 2006
  19. *
  20. * Authors: Paul E. McKenney <paulmck@us.ibm.com>
  21. * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
  22. * for pushing me away from locks and towards counters, and
  23. * to Suparna Bhattacharya for pushing me completely away
  24. * from atomic instructions on the read side.
  25. *
  26. * Papers: http://www.rdrop.com/users/paulmck/RCU
  27. *
  28. * Design Document: http://lwn.net/Articles/253651/
  29. *
  30. * For detailed explanation of Read-Copy Update mechanism see -
  31. * Documentation/RCU/ *.txt
  32. *
  33. */
  34. #include <linux/types.h>
  35. #include <linux/kernel.h>
  36. #include <linux/init.h>
  37. #include <linux/spinlock.h>
  38. #include <linux/smp.h>
  39. #include <linux/rcupdate.h>
  40. #include <linux/interrupt.h>
  41. #include <linux/sched.h>
  42. #include <asm/atomic.h>
  43. #include <linux/bitops.h>
  44. #include <linux/module.h>
  45. #include <linux/completion.h>
  46. #include <linux/moduleparam.h>
  47. #include <linux/percpu.h>
  48. #include <linux/notifier.h>
  49. #include <linux/rcupdate.h>
  50. #include <linux/cpu.h>
  51. #include <linux/random.h>
  52. #include <linux/delay.h>
  53. #include <linux/byteorder/swabb.h>
  54. #include <linux/cpumask.h>
  55. #include <linux/rcupreempt_trace.h>
  56. /*
  57. * Macro that prevents the compiler from reordering accesses, but does
  58. * absolutely -nothing- to prevent CPUs from reordering. This is used
  59. * only to mediate communication between mainline code and hardware
  60. * interrupt and NMI handlers.
  61. */
  62. #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
  63. /*
  64. * PREEMPT_RCU data structures.
  65. */
  66. /*
  67. * GP_STAGES specifies the number of times the state machine has
  68. * to go through the all the rcu_try_flip_states (see below)
  69. * in a single Grace Period.
  70. *
  71. * GP in GP_STAGES stands for Grace Period ;)
  72. */
  73. #define GP_STAGES 2
  74. struct rcu_data {
  75. spinlock_t lock; /* Protect rcu_data fields. */
  76. long completed; /* Number of last completed batch. */
  77. int waitlistcount;
  78. struct tasklet_struct rcu_tasklet;
  79. struct rcu_head *nextlist;
  80. struct rcu_head **nexttail;
  81. struct rcu_head *waitlist[GP_STAGES];
  82. struct rcu_head **waittail[GP_STAGES];
  83. struct rcu_head *donelist;
  84. struct rcu_head **donetail;
  85. long rcu_flipctr[2];
  86. #ifdef CONFIG_RCU_TRACE
  87. struct rcupreempt_trace trace;
  88. #endif /* #ifdef CONFIG_RCU_TRACE */
  89. };
  90. /*
  91. * States for rcu_try_flip() and friends.
  92. */
  93. enum rcu_try_flip_states {
  94. /*
  95. * Stay here if nothing is happening. Flip the counter if somthing
  96. * starts happening. Denoted by "I"
  97. */
  98. rcu_try_flip_idle_state,
  99. /*
  100. * Wait here for all CPUs to notice that the counter has flipped. This
  101. * prevents the old set of counters from ever being incremented once
  102. * we leave this state, which in turn is necessary because we cannot
  103. * test any individual counter for zero -- we can only check the sum.
  104. * Denoted by "A".
  105. */
  106. rcu_try_flip_waitack_state,
  107. /*
  108. * Wait here for the sum of the old per-CPU counters to reach zero.
  109. * Denoted by "Z".
  110. */
  111. rcu_try_flip_waitzero_state,
  112. /*
  113. * Wait here for each of the other CPUs to execute a memory barrier.
  114. * This is necessary to ensure that these other CPUs really have
  115. * completed executing their RCU read-side critical sections, despite
  116. * their CPUs wildly reordering memory. Denoted by "M".
  117. */
  118. rcu_try_flip_waitmb_state,
  119. };
  120. struct rcu_ctrlblk {
  121. spinlock_t fliplock; /* Protect state-machine transitions. */
  122. long completed; /* Number of last completed batch. */
  123. enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
  124. the rcu state machine */
  125. };
  126. static DEFINE_PER_CPU(struct rcu_data, rcu_data);
  127. static struct rcu_ctrlblk rcu_ctrlblk = {
  128. .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
  129. .completed = 0,
  130. .rcu_try_flip_state = rcu_try_flip_idle_state,
  131. };
  132. #ifdef CONFIG_RCU_TRACE
  133. static char *rcu_try_flip_state_names[] =
  134. { "idle", "waitack", "waitzero", "waitmb" };
  135. #endif /* #ifdef CONFIG_RCU_TRACE */
  136. static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
  137. /*
  138. * Enum and per-CPU flag to determine when each CPU has seen
  139. * the most recent counter flip.
  140. */
  141. enum rcu_flip_flag_values {
  142. rcu_flip_seen, /* Steady/initial state, last flip seen. */
  143. /* Only GP detector can update. */
  144. rcu_flipped /* Flip just completed, need confirmation. */
  145. /* Only corresponding CPU can update. */
  146. };
  147. static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
  148. = rcu_flip_seen;
  149. /*
  150. * Enum and per-CPU flag to determine when each CPU has executed the
  151. * needed memory barrier to fence in memory references from its last RCU
  152. * read-side critical section in the just-completed grace period.
  153. */
  154. enum rcu_mb_flag_values {
  155. rcu_mb_done, /* Steady/initial state, no mb()s required. */
  156. /* Only GP detector can update. */
  157. rcu_mb_needed /* Flip just completed, need an mb(). */
  158. /* Only corresponding CPU can update. */
  159. };
  160. static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
  161. = rcu_mb_done;
  162. /*
  163. * RCU_DATA_ME: find the current CPU's rcu_data structure.
  164. * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
  165. */
  166. #define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
  167. #define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
  168. /*
  169. * Helper macro for tracing when the appropriate rcu_data is not
  170. * cached in a local variable, but where the CPU number is so cached.
  171. */
  172. #define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
  173. /*
  174. * Helper macro for tracing when the appropriate rcu_data is not
  175. * cached in a local variable.
  176. */
  177. #define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
  178. /*
  179. * Helper macro for tracing when the appropriate rcu_data is pointed
  180. * to by a local variable.
  181. */
  182. #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
  183. /*
  184. * Return the number of RCU batches processed thus far. Useful
  185. * for debug and statistics.
  186. */
  187. long rcu_batches_completed(void)
  188. {
  189. return rcu_ctrlblk.completed;
  190. }
  191. EXPORT_SYMBOL_GPL(rcu_batches_completed);
  192. EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
  193. void __rcu_read_lock(void)
  194. {
  195. int idx;
  196. struct task_struct *t = current;
  197. int nesting;
  198. nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
  199. if (nesting != 0) {
  200. /* An earlier rcu_read_lock() covers us, just count it. */
  201. t->rcu_read_lock_nesting = nesting + 1;
  202. } else {
  203. unsigned long flags;
  204. /*
  205. * We disable interrupts for the following reasons:
  206. * - If we get scheduling clock interrupt here, and we
  207. * end up acking the counter flip, it's like a promise
  208. * that we will never increment the old counter again.
  209. * Thus we will break that promise if that
  210. * scheduling clock interrupt happens between the time
  211. * we pick the .completed field and the time that we
  212. * increment our counter.
  213. *
  214. * - We don't want to be preempted out here.
  215. *
  216. * NMIs can still occur, of course, and might themselves
  217. * contain rcu_read_lock().
  218. */
  219. local_irq_save(flags);
  220. /*
  221. * Outermost nesting of rcu_read_lock(), so increment
  222. * the current counter for the current CPU. Use volatile
  223. * casts to prevent the compiler from reordering.
  224. */
  225. idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
  226. ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
  227. /*
  228. * Now that the per-CPU counter has been incremented, we
  229. * are protected from races with rcu_read_lock() invoked
  230. * from NMI handlers on this CPU. We can therefore safely
  231. * increment the nesting counter, relieving further NMIs
  232. * of the need to increment the per-CPU counter.
  233. */
  234. ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
  235. /*
  236. * Now that we have preventing any NMIs from storing
  237. * to the ->rcu_flipctr_idx, we can safely use it to
  238. * remember which counter to decrement in the matching
  239. * rcu_read_unlock().
  240. */
  241. ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
  242. local_irq_restore(flags);
  243. }
  244. }
  245. EXPORT_SYMBOL_GPL(__rcu_read_lock);
  246. void __rcu_read_unlock(void)
  247. {
  248. int idx;
  249. struct task_struct *t = current;
  250. int nesting;
  251. nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
  252. if (nesting > 1) {
  253. /*
  254. * We are still protected by the enclosing rcu_read_lock(),
  255. * so simply decrement the counter.
  256. */
  257. t->rcu_read_lock_nesting = nesting - 1;
  258. } else {
  259. unsigned long flags;
  260. /*
  261. * Disable local interrupts to prevent the grace-period
  262. * detection state machine from seeing us half-done.
  263. * NMIs can still occur, of course, and might themselves
  264. * contain rcu_read_lock() and rcu_read_unlock().
  265. */
  266. local_irq_save(flags);
  267. /*
  268. * Outermost nesting of rcu_read_unlock(), so we must
  269. * decrement the current counter for the current CPU.
  270. * This must be done carefully, because NMIs can
  271. * occur at any point in this code, and any rcu_read_lock()
  272. * and rcu_read_unlock() pairs in the NMI handlers
  273. * must interact non-destructively with this code.
  274. * Lots of volatile casts, and -very- careful ordering.
  275. *
  276. * Changes to this code, including this one, must be
  277. * inspected, validated, and tested extremely carefully!!!
  278. */
  279. /*
  280. * First, pick up the index.
  281. */
  282. idx = ACCESS_ONCE(t->rcu_flipctr_idx);
  283. /*
  284. * Now that we have fetched the counter index, it is
  285. * safe to decrement the per-task RCU nesting counter.
  286. * After this, any interrupts or NMIs will increment and
  287. * decrement the per-CPU counters.
  288. */
  289. ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
  290. /*
  291. * It is now safe to decrement this task's nesting count.
  292. * NMIs that occur after this statement will route their
  293. * rcu_read_lock() calls through this "else" clause, and
  294. * will thus start incrementing the per-CPU counter on
  295. * their own. They will also clobber ->rcu_flipctr_idx,
  296. * but that is OK, since we have already fetched it.
  297. */
  298. ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
  299. local_irq_restore(flags);
  300. }
  301. }
  302. EXPORT_SYMBOL_GPL(__rcu_read_unlock);
  303. /*
  304. * If a global counter flip has occurred since the last time that we
  305. * advanced callbacks, advance them. Hardware interrupts must be
  306. * disabled when calling this function.
  307. */
  308. static void __rcu_advance_callbacks(struct rcu_data *rdp)
  309. {
  310. int cpu;
  311. int i;
  312. int wlc = 0;
  313. if (rdp->completed != rcu_ctrlblk.completed) {
  314. if (rdp->waitlist[GP_STAGES - 1] != NULL) {
  315. *rdp->donetail = rdp->waitlist[GP_STAGES - 1];
  316. rdp->donetail = rdp->waittail[GP_STAGES - 1];
  317. RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
  318. }
  319. for (i = GP_STAGES - 2; i >= 0; i--) {
  320. if (rdp->waitlist[i] != NULL) {
  321. rdp->waitlist[i + 1] = rdp->waitlist[i];
  322. rdp->waittail[i + 1] = rdp->waittail[i];
  323. wlc++;
  324. } else {
  325. rdp->waitlist[i + 1] = NULL;
  326. rdp->waittail[i + 1] =
  327. &rdp->waitlist[i + 1];
  328. }
  329. }
  330. if (rdp->nextlist != NULL) {
  331. rdp->waitlist[0] = rdp->nextlist;
  332. rdp->waittail[0] = rdp->nexttail;
  333. wlc++;
  334. rdp->nextlist = NULL;
  335. rdp->nexttail = &rdp->nextlist;
  336. RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
  337. } else {
  338. rdp->waitlist[0] = NULL;
  339. rdp->waittail[0] = &rdp->waitlist[0];
  340. }
  341. rdp->waitlistcount = wlc;
  342. rdp->completed = rcu_ctrlblk.completed;
  343. }
  344. /*
  345. * Check to see if this CPU needs to report that it has seen
  346. * the most recent counter flip, thereby declaring that all
  347. * subsequent rcu_read_lock() invocations will respect this flip.
  348. */
  349. cpu = raw_smp_processor_id();
  350. if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
  351. smp_mb(); /* Subsequent counter accesses must see new value */
  352. per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
  353. smp_mb(); /* Subsequent RCU read-side critical sections */
  354. /* seen -after- acknowledgement. */
  355. }
  356. }
  357. /*
  358. * Get here when RCU is idle. Decide whether we need to
  359. * move out of idle state, and return non-zero if so.
  360. * "Straightforward" approach for the moment, might later
  361. * use callback-list lengths, grace-period duration, or
  362. * some such to determine when to exit idle state.
  363. * Might also need a pre-idle test that does not acquire
  364. * the lock, but let's get the simple case working first...
  365. */
  366. static int
  367. rcu_try_flip_idle(void)
  368. {
  369. int cpu;
  370. RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
  371. if (!rcu_pending(smp_processor_id())) {
  372. RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
  373. return 0;
  374. }
  375. /*
  376. * Do the flip.
  377. */
  378. RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
  379. rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
  380. /*
  381. * Need a memory barrier so that other CPUs see the new
  382. * counter value before they see the subsequent change of all
  383. * the rcu_flip_flag instances to rcu_flipped.
  384. */
  385. smp_mb(); /* see above block comment. */
  386. /* Now ask each CPU for acknowledgement of the flip. */
  387. for_each_cpu_mask(cpu, rcu_cpu_online_map)
  388. per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
  389. return 1;
  390. }
  391. /*
  392. * Wait for CPUs to acknowledge the flip.
  393. */
  394. static int
  395. rcu_try_flip_waitack(void)
  396. {
  397. int cpu;
  398. RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
  399. for_each_cpu_mask(cpu, rcu_cpu_online_map)
  400. if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
  401. RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
  402. return 0;
  403. }
  404. /*
  405. * Make sure our checks above don't bleed into subsequent
  406. * waiting for the sum of the counters to reach zero.
  407. */
  408. smp_mb(); /* see above block comment. */
  409. RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
  410. return 1;
  411. }
  412. /*
  413. * Wait for collective ``last'' counter to reach zero,
  414. * then tell all CPUs to do an end-of-grace-period memory barrier.
  415. */
  416. static int
  417. rcu_try_flip_waitzero(void)
  418. {
  419. int cpu;
  420. int lastidx = !(rcu_ctrlblk.completed & 0x1);
  421. int sum = 0;
  422. /* Check to see if the sum of the "last" counters is zero. */
  423. RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
  424. for_each_cpu_mask(cpu, rcu_cpu_online_map)
  425. sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
  426. if (sum != 0) {
  427. RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
  428. return 0;
  429. }
  430. /*
  431. * This ensures that the other CPUs see the call for
  432. * memory barriers -after- the sum to zero has been
  433. * detected here
  434. */
  435. smp_mb(); /* ^^^^^^^^^^^^ */
  436. /* Call for a memory barrier from each CPU. */
  437. for_each_cpu_mask(cpu, rcu_cpu_online_map)
  438. per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
  439. RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
  440. return 1;
  441. }
  442. /*
  443. * Wait for all CPUs to do their end-of-grace-period memory barrier.
  444. * Return 0 once all CPUs have done so.
  445. */
  446. static int
  447. rcu_try_flip_waitmb(void)
  448. {
  449. int cpu;
  450. RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
  451. for_each_cpu_mask(cpu, rcu_cpu_online_map)
  452. if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
  453. RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
  454. return 0;
  455. }
  456. smp_mb(); /* Ensure that the above checks precede any following flip. */
  457. RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
  458. return 1;
  459. }
  460. /*
  461. * Attempt a single flip of the counters. Remember, a single flip does
  462. * -not- constitute a grace period. Instead, the interval between
  463. * at least GP_STAGES consecutive flips is a grace period.
  464. *
  465. * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
  466. * on a large SMP, they might want to use a hierarchical organization of
  467. * the per-CPU-counter pairs.
  468. */
  469. static void rcu_try_flip(void)
  470. {
  471. unsigned long flags;
  472. RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
  473. if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
  474. RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
  475. return;
  476. }
  477. /*
  478. * Take the next transition(s) through the RCU grace-period
  479. * flip-counter state machine.
  480. */
  481. switch (rcu_ctrlblk.rcu_try_flip_state) {
  482. case rcu_try_flip_idle_state:
  483. if (rcu_try_flip_idle())
  484. rcu_ctrlblk.rcu_try_flip_state =
  485. rcu_try_flip_waitack_state;
  486. break;
  487. case rcu_try_flip_waitack_state:
  488. if (rcu_try_flip_waitack())
  489. rcu_ctrlblk.rcu_try_flip_state =
  490. rcu_try_flip_waitzero_state;
  491. break;
  492. case rcu_try_flip_waitzero_state:
  493. if (rcu_try_flip_waitzero())
  494. rcu_ctrlblk.rcu_try_flip_state =
  495. rcu_try_flip_waitmb_state;
  496. break;
  497. case rcu_try_flip_waitmb_state:
  498. if (rcu_try_flip_waitmb())
  499. rcu_ctrlblk.rcu_try_flip_state =
  500. rcu_try_flip_idle_state;
  501. }
  502. spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  503. }
  504. /*
  505. * Check to see if this CPU needs to do a memory barrier in order to
  506. * ensure that any prior RCU read-side critical sections have committed
  507. * their counter manipulations and critical-section memory references
  508. * before declaring the grace period to be completed.
  509. */
  510. static void rcu_check_mb(int cpu)
  511. {
  512. if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
  513. smp_mb(); /* Ensure RCU read-side accesses are visible. */
  514. per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
  515. }
  516. }
  517. void rcu_check_callbacks(int cpu, int user)
  518. {
  519. unsigned long flags;
  520. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  521. rcu_check_mb(cpu);
  522. if (rcu_ctrlblk.completed == rdp->completed)
  523. rcu_try_flip();
  524. spin_lock_irqsave(&rdp->lock, flags);
  525. RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
  526. __rcu_advance_callbacks(rdp);
  527. if (rdp->donelist == NULL) {
  528. spin_unlock_irqrestore(&rdp->lock, flags);
  529. } else {
  530. spin_unlock_irqrestore(&rdp->lock, flags);
  531. raise_softirq(RCU_SOFTIRQ);
  532. }
  533. }
  534. /*
  535. * Needed by dynticks, to make sure all RCU processing has finished
  536. * when we go idle:
  537. */
  538. void rcu_advance_callbacks(int cpu, int user)
  539. {
  540. unsigned long flags;
  541. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  542. if (rcu_ctrlblk.completed == rdp->completed) {
  543. rcu_try_flip();
  544. if (rcu_ctrlblk.completed == rdp->completed)
  545. return;
  546. }
  547. spin_lock_irqsave(&rdp->lock, flags);
  548. RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
  549. __rcu_advance_callbacks(rdp);
  550. spin_unlock_irqrestore(&rdp->lock, flags);
  551. }
  552. #ifdef CONFIG_HOTPLUG_CPU
  553. #define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
  554. *dsttail = srclist; \
  555. if (srclist != NULL) { \
  556. dsttail = srctail; \
  557. srclist = NULL; \
  558. srctail = &srclist;\
  559. } \
  560. } while (0)
  561. void rcu_offline_cpu(int cpu)
  562. {
  563. int i;
  564. struct rcu_head *list = NULL;
  565. unsigned long flags;
  566. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  567. struct rcu_head **tail = &list;
  568. /*
  569. * Remove all callbacks from the newly dead CPU, retaining order.
  570. * Otherwise rcu_barrier() will fail
  571. */
  572. spin_lock_irqsave(&rdp->lock, flags);
  573. rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
  574. for (i = GP_STAGES - 1; i >= 0; i--)
  575. rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
  576. list, tail);
  577. rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
  578. spin_unlock_irqrestore(&rdp->lock, flags);
  579. rdp->waitlistcount = 0;
  580. /* Disengage the newly dead CPU from the grace-period computation. */
  581. spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
  582. rcu_check_mb(cpu);
  583. if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
  584. smp_mb(); /* Subsequent counter accesses must see new value */
  585. per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
  586. smp_mb(); /* Subsequent RCU read-side critical sections */
  587. /* seen -after- acknowledgement. */
  588. }
  589. RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
  590. RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
  591. RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
  592. RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
  593. cpu_clear(cpu, rcu_cpu_online_map);
  594. spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  595. /*
  596. * Place the removed callbacks on the current CPU's queue.
  597. * Make them all start a new grace period: simple approach,
  598. * in theory could starve a given set of callbacks, but
  599. * you would need to be doing some serious CPU hotplugging
  600. * to make this happen. If this becomes a problem, adding
  601. * a synchronize_rcu() to the hotplug path would be a simple
  602. * fix.
  603. */
  604. rdp = RCU_DATA_ME();
  605. spin_lock_irqsave(&rdp->lock, flags);
  606. *rdp->nexttail = list;
  607. if (list)
  608. rdp->nexttail = tail;
  609. spin_unlock_irqrestore(&rdp->lock, flags);
  610. }
  611. void __devinit rcu_online_cpu(int cpu)
  612. {
  613. unsigned long flags;
  614. spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
  615. cpu_set(cpu, rcu_cpu_online_map);
  616. spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  617. }
  618. #else /* #ifdef CONFIG_HOTPLUG_CPU */
  619. void rcu_offline_cpu(int cpu)
  620. {
  621. }
  622. void __devinit rcu_online_cpu(int cpu)
  623. {
  624. }
  625. #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
  626. static void rcu_process_callbacks(struct softirq_action *unused)
  627. {
  628. unsigned long flags;
  629. struct rcu_head *next, *list;
  630. struct rcu_data *rdp = RCU_DATA_ME();
  631. spin_lock_irqsave(&rdp->lock, flags);
  632. list = rdp->donelist;
  633. if (list == NULL) {
  634. spin_unlock_irqrestore(&rdp->lock, flags);
  635. return;
  636. }
  637. rdp->donelist = NULL;
  638. rdp->donetail = &rdp->donelist;
  639. RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
  640. spin_unlock_irqrestore(&rdp->lock, flags);
  641. while (list) {
  642. next = list->next;
  643. list->func(list);
  644. list = next;
  645. RCU_TRACE_ME(rcupreempt_trace_invoke);
  646. }
  647. }
  648. void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  649. {
  650. unsigned long flags;
  651. struct rcu_data *rdp;
  652. head->func = func;
  653. head->next = NULL;
  654. local_irq_save(flags);
  655. rdp = RCU_DATA_ME();
  656. spin_lock(&rdp->lock);
  657. __rcu_advance_callbacks(rdp);
  658. *rdp->nexttail = head;
  659. rdp->nexttail = &head->next;
  660. RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
  661. spin_unlock(&rdp->lock);
  662. local_irq_restore(flags);
  663. }
  664. EXPORT_SYMBOL_GPL(call_rcu);
  665. /*
  666. * Wait until all currently running preempt_disable() code segments
  667. * (including hardware-irq-disable segments) complete. Note that
  668. * in -rt this does -not- necessarily result in all currently executing
  669. * interrupt -handlers- having completed.
  670. */
  671. void __synchronize_sched(void)
  672. {
  673. cpumask_t oldmask;
  674. int cpu;
  675. if (sched_getaffinity(0, &oldmask) < 0)
  676. oldmask = cpu_possible_map;
  677. for_each_online_cpu(cpu) {
  678. sched_setaffinity(0, cpumask_of_cpu(cpu));
  679. schedule();
  680. }
  681. sched_setaffinity(0, oldmask);
  682. }
  683. EXPORT_SYMBOL_GPL(__synchronize_sched);
  684. /*
  685. * Check to see if any future RCU-related work will need to be done
  686. * by the current CPU, even if none need be done immediately, returning
  687. * 1 if so. Assumes that notifiers would take care of handling any
  688. * outstanding requests from the RCU core.
  689. *
  690. * This function is part of the RCU implementation; it is -not-
  691. * an exported member of the RCU API.
  692. */
  693. int rcu_needs_cpu(int cpu)
  694. {
  695. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  696. return (rdp->donelist != NULL ||
  697. !!rdp->waitlistcount ||
  698. rdp->nextlist != NULL);
  699. }
  700. int rcu_pending(int cpu)
  701. {
  702. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  703. /* The CPU has at least one callback queued somewhere. */
  704. if (rdp->donelist != NULL ||
  705. !!rdp->waitlistcount ||
  706. rdp->nextlist != NULL)
  707. return 1;
  708. /* The RCU core needs an acknowledgement from this CPU. */
  709. if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
  710. (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
  711. return 1;
  712. /* This CPU has fallen behind the global grace-period number. */
  713. if (rdp->completed != rcu_ctrlblk.completed)
  714. return 1;
  715. /* Nothing needed from this CPU. */
  716. return 0;
  717. }
  718. static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
  719. unsigned long action, void *hcpu)
  720. {
  721. long cpu = (long)hcpu;
  722. switch (action) {
  723. case CPU_UP_PREPARE:
  724. case CPU_UP_PREPARE_FROZEN:
  725. rcu_online_cpu(cpu);
  726. break;
  727. case CPU_UP_CANCELED:
  728. case CPU_UP_CANCELED_FROZEN:
  729. case CPU_DEAD:
  730. case CPU_DEAD_FROZEN:
  731. rcu_offline_cpu(cpu);
  732. break;
  733. default:
  734. break;
  735. }
  736. return NOTIFY_OK;
  737. }
  738. static struct notifier_block __cpuinitdata rcu_nb = {
  739. .notifier_call = rcu_cpu_notify,
  740. };
  741. void __init __rcu_init(void)
  742. {
  743. int cpu;
  744. int i;
  745. struct rcu_data *rdp;
  746. printk(KERN_NOTICE "Preemptible RCU implementation.\n");
  747. for_each_possible_cpu(cpu) {
  748. rdp = RCU_DATA_CPU(cpu);
  749. spin_lock_init(&rdp->lock);
  750. rdp->completed = 0;
  751. rdp->waitlistcount = 0;
  752. rdp->nextlist = NULL;
  753. rdp->nexttail = &rdp->nextlist;
  754. for (i = 0; i < GP_STAGES; i++) {
  755. rdp->waitlist[i] = NULL;
  756. rdp->waittail[i] = &rdp->waitlist[i];
  757. }
  758. rdp->donelist = NULL;
  759. rdp->donetail = &rdp->donelist;
  760. rdp->rcu_flipctr[0] = 0;
  761. rdp->rcu_flipctr[1] = 0;
  762. }
  763. register_cpu_notifier(&rcu_nb);
  764. /*
  765. * We don't need protection against CPU-Hotplug here
  766. * since
  767. * a) If a CPU comes online while we are iterating over the
  768. * cpu_online_map below, we would only end up making a
  769. * duplicate call to rcu_online_cpu() which sets the corresponding
  770. * CPU's mask in the rcu_cpu_online_map.
  771. *
  772. * b) A CPU cannot go offline at this point in time since the user
  773. * does not have access to the sysfs interface, nor do we
  774. * suspend the system.
  775. */
  776. for_each_online_cpu(cpu)
  777. rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
  778. open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
  779. }
  780. /*
  781. * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
  782. */
  783. void synchronize_kernel(void)
  784. {
  785. synchronize_rcu();
  786. }
  787. #ifdef CONFIG_RCU_TRACE
  788. long *rcupreempt_flipctr(int cpu)
  789. {
  790. return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
  791. }
  792. EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
  793. int rcupreempt_flip_flag(int cpu)
  794. {
  795. return per_cpu(rcu_flip_flag, cpu);
  796. }
  797. EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
  798. int rcupreempt_mb_flag(int cpu)
  799. {
  800. return per_cpu(rcu_mb_flag, cpu);
  801. }
  802. EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
  803. char *rcupreempt_try_flip_state_name(void)
  804. {
  805. return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
  806. }
  807. EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
  808. struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
  809. {
  810. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  811. return &rdp->trace;
  812. }
  813. EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
  814. #endif /* #ifdef RCU_TRACE */