rcupreempt.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545
  1. /*
  2. * Read-Copy Update mechanism for mutual exclusion, realtime implementation
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. *
  18. * Copyright IBM Corporation, 2006
  19. *
  20. * Authors: Paul E. McKenney <paulmck@us.ibm.com>
  21. * With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
  22. * for pushing me away from locks and towards counters, and
  23. * to Suparna Bhattacharya for pushing me completely away
  24. * from atomic instructions on the read side.
  25. *
  26. * - Added handling of Dynamic Ticks
  27. * Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
  28. * - Steven Rostedt <srostedt@redhat.com>
  29. *
  30. * Papers: http://www.rdrop.com/users/paulmck/RCU
  31. *
  32. * Design Document: http://lwn.net/Articles/253651/
  33. *
  34. * For detailed explanation of Read-Copy Update mechanism see -
  35. * Documentation/RCU/ *.txt
  36. *
  37. */
  38. #include <linux/types.h>
  39. #include <linux/kernel.h>
  40. #include <linux/init.h>
  41. #include <linux/spinlock.h>
  42. #include <linux/smp.h>
  43. #include <linux/rcupdate.h>
  44. #include <linux/interrupt.h>
  45. #include <linux/sched.h>
  46. #include <asm/atomic.h>
  47. #include <linux/bitops.h>
  48. #include <linux/module.h>
  49. #include <linux/kthread.h>
  50. #include <linux/completion.h>
  51. #include <linux/moduleparam.h>
  52. #include <linux/percpu.h>
  53. #include <linux/notifier.h>
  54. #include <linux/cpu.h>
  55. #include <linux/random.h>
  56. #include <linux/delay.h>
  57. #include <linux/cpumask.h>
  58. #include <linux/rcupreempt_trace.h>
  59. #include <asm/byteorder.h>
  60. /*
  61. * PREEMPT_RCU data structures.
  62. */
  63. /*
  64. * GP_STAGES specifies the number of times the state machine has
  65. * to go through the all the rcu_try_flip_states (see below)
  66. * in a single Grace Period.
  67. *
  68. * GP in GP_STAGES stands for Grace Period ;)
  69. */
  70. #define GP_STAGES 2
  71. struct rcu_data {
  72. spinlock_t lock; /* Protect rcu_data fields. */
  73. long completed; /* Number of last completed batch. */
  74. int waitlistcount;
  75. struct rcu_head *nextlist;
  76. struct rcu_head **nexttail;
  77. struct rcu_head *waitlist[GP_STAGES];
  78. struct rcu_head **waittail[GP_STAGES];
  79. struct rcu_head *donelist; /* from waitlist & waitschedlist */
  80. struct rcu_head **donetail;
  81. long rcu_flipctr[2];
  82. struct rcu_head *nextschedlist;
  83. struct rcu_head **nextschedtail;
  84. struct rcu_head *waitschedlist;
  85. struct rcu_head **waitschedtail;
  86. int rcu_sched_sleeping;
  87. #ifdef CONFIG_RCU_TRACE
  88. struct rcupreempt_trace trace;
  89. #endif /* #ifdef CONFIG_RCU_TRACE */
  90. };
  91. /*
  92. * States for rcu_try_flip() and friends.
  93. */
  94. enum rcu_try_flip_states {
  95. /*
  96. * Stay here if nothing is happening. Flip the counter if somthing
  97. * starts happening. Denoted by "I"
  98. */
  99. rcu_try_flip_idle_state,
  100. /*
  101. * Wait here for all CPUs to notice that the counter has flipped. This
  102. * prevents the old set of counters from ever being incremented once
  103. * we leave this state, which in turn is necessary because we cannot
  104. * test any individual counter for zero -- we can only check the sum.
  105. * Denoted by "A".
  106. */
  107. rcu_try_flip_waitack_state,
  108. /*
  109. * Wait here for the sum of the old per-CPU counters to reach zero.
  110. * Denoted by "Z".
  111. */
  112. rcu_try_flip_waitzero_state,
  113. /*
  114. * Wait here for each of the other CPUs to execute a memory barrier.
  115. * This is necessary to ensure that these other CPUs really have
  116. * completed executing their RCU read-side critical sections, despite
  117. * their CPUs wildly reordering memory. Denoted by "M".
  118. */
  119. rcu_try_flip_waitmb_state,
  120. };
  121. /*
  122. * States for rcu_ctrlblk.rcu_sched_sleep.
  123. */
  124. enum rcu_sched_sleep_states {
  125. rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
  126. rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
  127. rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
  128. };
  129. struct rcu_ctrlblk {
  130. spinlock_t fliplock; /* Protect state-machine transitions. */
  131. long completed; /* Number of last completed batch. */
  132. enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
  133. the rcu state machine */
  134. spinlock_t schedlock; /* Protect rcu_sched sleep state. */
  135. enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
  136. wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
  137. };
  138. struct rcu_dyntick_sched {
  139. int dynticks;
  140. int dynticks_snap;
  141. int sched_qs;
  142. int sched_qs_snap;
  143. int sched_dynticks_snap;
  144. };
  145. static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
  146. .dynticks = 1,
  147. };
  148. void rcu_qsctr_inc(int cpu)
  149. {
  150. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  151. rdssp->sched_qs++;
  152. }
  153. #ifdef CONFIG_NO_HZ
  154. void rcu_enter_nohz(void)
  155. {
  156. static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
  157. smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
  158. __get_cpu_var(rcu_dyntick_sched).dynticks++;
  159. WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
  160. }
  161. void rcu_exit_nohz(void)
  162. {
  163. static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
  164. __get_cpu_var(rcu_dyntick_sched).dynticks++;
  165. smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
  166. WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
  167. &rs);
  168. }
  169. #endif /* CONFIG_NO_HZ */
  170. static DEFINE_PER_CPU(struct rcu_data, rcu_data);
  171. static struct rcu_ctrlblk rcu_ctrlblk = {
  172. .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
  173. .completed = 0,
  174. .rcu_try_flip_state = rcu_try_flip_idle_state,
  175. .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
  176. .sched_sleep = rcu_sched_not_sleeping,
  177. .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
  178. };
  179. static struct task_struct *rcu_sched_grace_period_task;
  180. #ifdef CONFIG_RCU_TRACE
  181. static char *rcu_try_flip_state_names[] =
  182. { "idle", "waitack", "waitzero", "waitmb" };
  183. #endif /* #ifdef CONFIG_RCU_TRACE */
  184. static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly
  185. = CPU_BITS_NONE;
  186. /*
  187. * Enum and per-CPU flag to determine when each CPU has seen
  188. * the most recent counter flip.
  189. */
  190. enum rcu_flip_flag_values {
  191. rcu_flip_seen, /* Steady/initial state, last flip seen. */
  192. /* Only GP detector can update. */
  193. rcu_flipped /* Flip just completed, need confirmation. */
  194. /* Only corresponding CPU can update. */
  195. };
  196. static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
  197. = rcu_flip_seen;
  198. /*
  199. * Enum and per-CPU flag to determine when each CPU has executed the
  200. * needed memory barrier to fence in memory references from its last RCU
  201. * read-side critical section in the just-completed grace period.
  202. */
  203. enum rcu_mb_flag_values {
  204. rcu_mb_done, /* Steady/initial state, no mb()s required. */
  205. /* Only GP detector can update. */
  206. rcu_mb_needed /* Flip just completed, need an mb(). */
  207. /* Only corresponding CPU can update. */
  208. };
  209. static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
  210. = rcu_mb_done;
  211. /*
  212. * RCU_DATA_ME: find the current CPU's rcu_data structure.
  213. * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
  214. */
  215. #define RCU_DATA_ME() (&__get_cpu_var(rcu_data))
  216. #define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))
  217. /*
  218. * Helper macro for tracing when the appropriate rcu_data is not
  219. * cached in a local variable, but where the CPU number is so cached.
  220. */
  221. #define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
  222. /*
  223. * Helper macro for tracing when the appropriate rcu_data is not
  224. * cached in a local variable.
  225. */
  226. #define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
  227. /*
  228. * Helper macro for tracing when the appropriate rcu_data is pointed
  229. * to by a local variable.
  230. */
  231. #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
  232. #define RCU_SCHED_BATCH_TIME (HZ / 50)
  233. /*
  234. * Return the number of RCU batches processed thus far. Useful
  235. * for debug and statistics.
  236. */
  237. long rcu_batches_completed(void)
  238. {
  239. return rcu_ctrlblk.completed;
  240. }
  241. EXPORT_SYMBOL_GPL(rcu_batches_completed);
  242. void __rcu_read_lock(void)
  243. {
  244. int idx;
  245. struct task_struct *t = current;
  246. int nesting;
  247. nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
  248. if (nesting != 0) {
  249. /* An earlier rcu_read_lock() covers us, just count it. */
  250. t->rcu_read_lock_nesting = nesting + 1;
  251. } else {
  252. unsigned long flags;
  253. /*
  254. * We disable interrupts for the following reasons:
  255. * - If we get scheduling clock interrupt here, and we
  256. * end up acking the counter flip, it's like a promise
  257. * that we will never increment the old counter again.
  258. * Thus we will break that promise if that
  259. * scheduling clock interrupt happens between the time
  260. * we pick the .completed field and the time that we
  261. * increment our counter.
  262. *
  263. * - We don't want to be preempted out here.
  264. *
  265. * NMIs can still occur, of course, and might themselves
  266. * contain rcu_read_lock().
  267. */
  268. local_irq_save(flags);
  269. /*
  270. * Outermost nesting of rcu_read_lock(), so increment
  271. * the current counter for the current CPU. Use volatile
  272. * casts to prevent the compiler from reordering.
  273. */
  274. idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
  275. ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
  276. /*
  277. * Now that the per-CPU counter has been incremented, we
  278. * are protected from races with rcu_read_lock() invoked
  279. * from NMI handlers on this CPU. We can therefore safely
  280. * increment the nesting counter, relieving further NMIs
  281. * of the need to increment the per-CPU counter.
  282. */
  283. ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
  284. /*
  285. * Now that we have preventing any NMIs from storing
  286. * to the ->rcu_flipctr_idx, we can safely use it to
  287. * remember which counter to decrement in the matching
  288. * rcu_read_unlock().
  289. */
  290. ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
  291. local_irq_restore(flags);
  292. }
  293. }
  294. EXPORT_SYMBOL_GPL(__rcu_read_lock);
  295. void __rcu_read_unlock(void)
  296. {
  297. int idx;
  298. struct task_struct *t = current;
  299. int nesting;
  300. nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
  301. if (nesting > 1) {
  302. /*
  303. * We are still protected by the enclosing rcu_read_lock(),
  304. * so simply decrement the counter.
  305. */
  306. t->rcu_read_lock_nesting = nesting - 1;
  307. } else {
  308. unsigned long flags;
  309. /*
  310. * Disable local interrupts to prevent the grace-period
  311. * detection state machine from seeing us half-done.
  312. * NMIs can still occur, of course, and might themselves
  313. * contain rcu_read_lock() and rcu_read_unlock().
  314. */
  315. local_irq_save(flags);
  316. /*
  317. * Outermost nesting of rcu_read_unlock(), so we must
  318. * decrement the current counter for the current CPU.
  319. * This must be done carefully, because NMIs can
  320. * occur at any point in this code, and any rcu_read_lock()
  321. * and rcu_read_unlock() pairs in the NMI handlers
  322. * must interact non-destructively with this code.
  323. * Lots of volatile casts, and -very- careful ordering.
  324. *
  325. * Changes to this code, including this one, must be
  326. * inspected, validated, and tested extremely carefully!!!
  327. */
  328. /*
  329. * First, pick up the index.
  330. */
  331. idx = ACCESS_ONCE(t->rcu_flipctr_idx);
  332. /*
  333. * Now that we have fetched the counter index, it is
  334. * safe to decrement the per-task RCU nesting counter.
  335. * After this, any interrupts or NMIs will increment and
  336. * decrement the per-CPU counters.
  337. */
  338. ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
  339. /*
  340. * It is now safe to decrement this task's nesting count.
  341. * NMIs that occur after this statement will route their
  342. * rcu_read_lock() calls through this "else" clause, and
  343. * will thus start incrementing the per-CPU counter on
  344. * their own. They will also clobber ->rcu_flipctr_idx,
  345. * but that is OK, since we have already fetched it.
  346. */
  347. ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
  348. local_irq_restore(flags);
  349. }
  350. }
  351. EXPORT_SYMBOL_GPL(__rcu_read_unlock);
  352. /*
  353. * If a global counter flip has occurred since the last time that we
  354. * advanced callbacks, advance them. Hardware interrupts must be
  355. * disabled when calling this function.
  356. */
  357. static void __rcu_advance_callbacks(struct rcu_data *rdp)
  358. {
  359. int cpu;
  360. int i;
  361. int wlc = 0;
  362. if (rdp->completed != rcu_ctrlblk.completed) {
  363. if (rdp->waitlist[GP_STAGES - 1] != NULL) {
  364. *rdp->donetail = rdp->waitlist[GP_STAGES - 1];
  365. rdp->donetail = rdp->waittail[GP_STAGES - 1];
  366. RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
  367. }
  368. for (i = GP_STAGES - 2; i >= 0; i--) {
  369. if (rdp->waitlist[i] != NULL) {
  370. rdp->waitlist[i + 1] = rdp->waitlist[i];
  371. rdp->waittail[i + 1] = rdp->waittail[i];
  372. wlc++;
  373. } else {
  374. rdp->waitlist[i + 1] = NULL;
  375. rdp->waittail[i + 1] =
  376. &rdp->waitlist[i + 1];
  377. }
  378. }
  379. if (rdp->nextlist != NULL) {
  380. rdp->waitlist[0] = rdp->nextlist;
  381. rdp->waittail[0] = rdp->nexttail;
  382. wlc++;
  383. rdp->nextlist = NULL;
  384. rdp->nexttail = &rdp->nextlist;
  385. RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
  386. } else {
  387. rdp->waitlist[0] = NULL;
  388. rdp->waittail[0] = &rdp->waitlist[0];
  389. }
  390. rdp->waitlistcount = wlc;
  391. rdp->completed = rcu_ctrlblk.completed;
  392. }
  393. /*
  394. * Check to see if this CPU needs to report that it has seen
  395. * the most recent counter flip, thereby declaring that all
  396. * subsequent rcu_read_lock() invocations will respect this flip.
  397. */
  398. cpu = raw_smp_processor_id();
  399. if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
  400. smp_mb(); /* Subsequent counter accesses must see new value */
  401. per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
  402. smp_mb(); /* Subsequent RCU read-side critical sections */
  403. /* seen -after- acknowledgement. */
  404. }
  405. }
  406. #ifdef CONFIG_NO_HZ
  407. static DEFINE_PER_CPU(int, rcu_update_flag);
  408. /**
  409. * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
  410. *
  411. * If the CPU was idle with dynamic ticks active, this updates the
  412. * rcu_dyntick_sched.dynticks to let the RCU handling know that the
  413. * CPU is active.
  414. */
  415. void rcu_irq_enter(void)
  416. {
  417. int cpu = smp_processor_id();
  418. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  419. if (per_cpu(rcu_update_flag, cpu))
  420. per_cpu(rcu_update_flag, cpu)++;
  421. /*
  422. * Only update if we are coming from a stopped ticks mode
  423. * (rcu_dyntick_sched.dynticks is even).
  424. */
  425. if (!in_interrupt() &&
  426. (rdssp->dynticks & 0x1) == 0) {
  427. /*
  428. * The following might seem like we could have a race
  429. * with NMI/SMIs. But this really isn't a problem.
  430. * Here we do a read/modify/write, and the race happens
  431. * when an NMI/SMI comes in after the read and before
  432. * the write. But NMI/SMIs will increment this counter
  433. * twice before returning, so the zero bit will not
  434. * be corrupted by the NMI/SMI which is the most important
  435. * part.
  436. *
  437. * The only thing is that we would bring back the counter
  438. * to a postion that it was in during the NMI/SMI.
  439. * But the zero bit would be set, so the rest of the
  440. * counter would again be ignored.
  441. *
  442. * On return from the IRQ, the counter may have the zero
  443. * bit be 0 and the counter the same as the return from
  444. * the NMI/SMI. If the state machine was so unlucky to
  445. * see that, it still doesn't matter, since all
  446. * RCU read-side critical sections on this CPU would
  447. * have already completed.
  448. */
  449. rdssp->dynticks++;
  450. /*
  451. * The following memory barrier ensures that any
  452. * rcu_read_lock() primitives in the irq handler
  453. * are seen by other CPUs to follow the above
  454. * increment to rcu_dyntick_sched.dynticks. This is
  455. * required in order for other CPUs to correctly
  456. * determine when it is safe to advance the RCU
  457. * grace-period state machine.
  458. */
  459. smp_mb(); /* see above block comment. */
  460. /*
  461. * Since we can't determine the dynamic tick mode from
  462. * the rcu_dyntick_sched.dynticks after this routine,
  463. * we use a second flag to acknowledge that we came
  464. * from an idle state with ticks stopped.
  465. */
  466. per_cpu(rcu_update_flag, cpu)++;
  467. /*
  468. * If we take an NMI/SMI now, they will also increment
  469. * the rcu_update_flag, and will not update the
  470. * rcu_dyntick_sched.dynticks on exit. That is for
  471. * this IRQ to do.
  472. */
  473. }
  474. }
  475. /**
  476. * rcu_irq_exit - Called from exiting Hard irq context.
  477. *
  478. * If the CPU was idle with dynamic ticks active, update the
  479. * rcu_dyntick_sched.dynticks to put let the RCU handling be
  480. * aware that the CPU is going back to idle with no ticks.
  481. */
  482. void rcu_irq_exit(void)
  483. {
  484. int cpu = smp_processor_id();
  485. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  486. /*
  487. * rcu_update_flag is set if we interrupted the CPU
  488. * when it was idle with ticks stopped.
  489. * Once this occurs, we keep track of interrupt nesting
  490. * because a NMI/SMI could also come in, and we still
  491. * only want the IRQ that started the increment of the
  492. * rcu_dyntick_sched.dynticks to be the one that modifies
  493. * it on exit.
  494. */
  495. if (per_cpu(rcu_update_flag, cpu)) {
  496. if (--per_cpu(rcu_update_flag, cpu))
  497. return;
  498. /* This must match the interrupt nesting */
  499. WARN_ON(in_interrupt());
  500. /*
  501. * If an NMI/SMI happens now we are still
  502. * protected by the rcu_dyntick_sched.dynticks being odd.
  503. */
  504. /*
  505. * The following memory barrier ensures that any
  506. * rcu_read_unlock() primitives in the irq handler
  507. * are seen by other CPUs to preceed the following
  508. * increment to rcu_dyntick_sched.dynticks. This
  509. * is required in order for other CPUs to determine
  510. * when it is safe to advance the RCU grace-period
  511. * state machine.
  512. */
  513. smp_mb(); /* see above block comment. */
  514. rdssp->dynticks++;
  515. WARN_ON(rdssp->dynticks & 0x1);
  516. }
  517. }
  518. void rcu_nmi_enter(void)
  519. {
  520. rcu_irq_enter();
  521. }
  522. void rcu_nmi_exit(void)
  523. {
  524. rcu_irq_exit();
  525. }
  526. static void dyntick_save_progress_counter(int cpu)
  527. {
  528. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  529. rdssp->dynticks_snap = rdssp->dynticks;
  530. }
  531. static inline int
  532. rcu_try_flip_waitack_needed(int cpu)
  533. {
  534. long curr;
  535. long snap;
  536. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  537. curr = rdssp->dynticks;
  538. snap = rdssp->dynticks_snap;
  539. smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
  540. /*
  541. * If the CPU remained in dynticks mode for the entire time
  542. * and didn't take any interrupts, NMIs, SMIs, or whatever,
  543. * then it cannot be in the middle of an rcu_read_lock(), so
  544. * the next rcu_read_lock() it executes must use the new value
  545. * of the counter. So we can safely pretend that this CPU
  546. * already acknowledged the counter.
  547. */
  548. if ((curr == snap) && ((curr & 0x1) == 0))
  549. return 0;
  550. /*
  551. * If the CPU passed through or entered a dynticks idle phase with
  552. * no active irq handlers, then, as above, we can safely pretend
  553. * that this CPU already acknowledged the counter.
  554. */
  555. if ((curr - snap) > 2 || (curr & 0x1) == 0)
  556. return 0;
  557. /* We need this CPU to explicitly acknowledge the counter flip. */
  558. return 1;
  559. }
  560. static inline int
  561. rcu_try_flip_waitmb_needed(int cpu)
  562. {
  563. long curr;
  564. long snap;
  565. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  566. curr = rdssp->dynticks;
  567. snap = rdssp->dynticks_snap;
  568. smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
  569. /*
  570. * If the CPU remained in dynticks mode for the entire time
  571. * and didn't take any interrupts, NMIs, SMIs, or whatever,
  572. * then it cannot have executed an RCU read-side critical section
  573. * during that time, so there is no need for it to execute a
  574. * memory barrier.
  575. */
  576. if ((curr == snap) && ((curr & 0x1) == 0))
  577. return 0;
  578. /*
  579. * If the CPU either entered or exited an outermost interrupt,
  580. * SMI, NMI, or whatever handler, then we know that it executed
  581. * a memory barrier when doing so. So we don't need another one.
  582. */
  583. if (curr != snap)
  584. return 0;
  585. /* We need the CPU to execute a memory barrier. */
  586. return 1;
  587. }
  588. static void dyntick_save_progress_counter_sched(int cpu)
  589. {
  590. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  591. rdssp->sched_dynticks_snap = rdssp->dynticks;
  592. }
  593. static int rcu_qsctr_inc_needed_dyntick(int cpu)
  594. {
  595. long curr;
  596. long snap;
  597. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  598. curr = rdssp->dynticks;
  599. snap = rdssp->sched_dynticks_snap;
  600. smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
  601. /*
  602. * If the CPU remained in dynticks mode for the entire time
  603. * and didn't take any interrupts, NMIs, SMIs, or whatever,
  604. * then it cannot be in the middle of an rcu_read_lock(), so
  605. * the next rcu_read_lock() it executes must use the new value
  606. * of the counter. Therefore, this CPU has been in a quiescent
  607. * state the entire time, and we don't need to wait for it.
  608. */
  609. if ((curr == snap) && ((curr & 0x1) == 0))
  610. return 0;
  611. /*
  612. * If the CPU passed through or entered a dynticks idle phase with
  613. * no active irq handlers, then, as above, this CPU has already
  614. * passed through a quiescent state.
  615. */
  616. if ((curr - snap) > 2 || (snap & 0x1) == 0)
  617. return 0;
  618. /* We need this CPU to go through a quiescent state. */
  619. return 1;
  620. }
  621. #else /* !CONFIG_NO_HZ */
  622. # define dyntick_save_progress_counter(cpu) do { } while (0)
  623. # define rcu_try_flip_waitack_needed(cpu) (1)
  624. # define rcu_try_flip_waitmb_needed(cpu) (1)
  625. # define dyntick_save_progress_counter_sched(cpu) do { } while (0)
  626. # define rcu_qsctr_inc_needed_dyntick(cpu) (1)
  627. #endif /* CONFIG_NO_HZ */
  628. static void save_qsctr_sched(int cpu)
  629. {
  630. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  631. rdssp->sched_qs_snap = rdssp->sched_qs;
  632. }
  633. static inline int rcu_qsctr_inc_needed(int cpu)
  634. {
  635. struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
  636. /*
  637. * If there has been a quiescent state, no more need to wait
  638. * on this CPU.
  639. */
  640. if (rdssp->sched_qs != rdssp->sched_qs_snap) {
  641. smp_mb(); /* force ordering with cpu entering schedule(). */
  642. return 0;
  643. }
  644. /* We need this CPU to go through a quiescent state. */
  645. return 1;
  646. }
  647. /*
  648. * Get here when RCU is idle. Decide whether we need to
  649. * move out of idle state, and return non-zero if so.
  650. * "Straightforward" approach for the moment, might later
  651. * use callback-list lengths, grace-period duration, or
  652. * some such to determine when to exit idle state.
  653. * Might also need a pre-idle test that does not acquire
  654. * the lock, but let's get the simple case working first...
  655. */
  656. static int
  657. rcu_try_flip_idle(void)
  658. {
  659. int cpu;
  660. RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
  661. if (!rcu_pending(smp_processor_id())) {
  662. RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
  663. return 0;
  664. }
  665. /*
  666. * Do the flip.
  667. */
  668. RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
  669. rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */
  670. /*
  671. * Need a memory barrier so that other CPUs see the new
  672. * counter value before they see the subsequent change of all
  673. * the rcu_flip_flag instances to rcu_flipped.
  674. */
  675. smp_mb(); /* see above block comment. */
  676. /* Now ask each CPU for acknowledgement of the flip. */
  677. for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
  678. per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
  679. dyntick_save_progress_counter(cpu);
  680. }
  681. return 1;
  682. }
  683. /*
  684. * Wait for CPUs to acknowledge the flip.
  685. */
  686. static int
  687. rcu_try_flip_waitack(void)
  688. {
  689. int cpu;
  690. RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
  691. for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
  692. if (rcu_try_flip_waitack_needed(cpu) &&
  693. per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
  694. RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
  695. return 0;
  696. }
  697. /*
  698. * Make sure our checks above don't bleed into subsequent
  699. * waiting for the sum of the counters to reach zero.
  700. */
  701. smp_mb(); /* see above block comment. */
  702. RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
  703. return 1;
  704. }
  705. /*
  706. * Wait for collective ``last'' counter to reach zero,
  707. * then tell all CPUs to do an end-of-grace-period memory barrier.
  708. */
  709. static int
  710. rcu_try_flip_waitzero(void)
  711. {
  712. int cpu;
  713. int lastidx = !(rcu_ctrlblk.completed & 0x1);
  714. int sum = 0;
  715. /* Check to see if the sum of the "last" counters is zero. */
  716. RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
  717. for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
  718. sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
  719. if (sum != 0) {
  720. RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
  721. return 0;
  722. }
  723. /*
  724. * This ensures that the other CPUs see the call for
  725. * memory barriers -after- the sum to zero has been
  726. * detected here
  727. */
  728. smp_mb(); /* ^^^^^^^^^^^^ */
  729. /* Call for a memory barrier from each CPU. */
  730. for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
  731. per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
  732. dyntick_save_progress_counter(cpu);
  733. }
  734. RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
  735. return 1;
  736. }
  737. /*
  738. * Wait for all CPUs to do their end-of-grace-period memory barrier.
  739. * Return 0 once all CPUs have done so.
  740. */
  741. static int
  742. rcu_try_flip_waitmb(void)
  743. {
  744. int cpu;
  745. RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
  746. for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
  747. if (rcu_try_flip_waitmb_needed(cpu) &&
  748. per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
  749. RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
  750. return 0;
  751. }
  752. smp_mb(); /* Ensure that the above checks precede any following flip. */
  753. RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
  754. return 1;
  755. }
  756. /*
  757. * Attempt a single flip of the counters. Remember, a single flip does
  758. * -not- constitute a grace period. Instead, the interval between
  759. * at least GP_STAGES consecutive flips is a grace period.
  760. *
  761. * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
  762. * on a large SMP, they might want to use a hierarchical organization of
  763. * the per-CPU-counter pairs.
  764. */
  765. static void rcu_try_flip(void)
  766. {
  767. unsigned long flags;
  768. RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
  769. if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
  770. RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
  771. return;
  772. }
  773. /*
  774. * Take the next transition(s) through the RCU grace-period
  775. * flip-counter state machine.
  776. */
  777. switch (rcu_ctrlblk.rcu_try_flip_state) {
  778. case rcu_try_flip_idle_state:
  779. if (rcu_try_flip_idle())
  780. rcu_ctrlblk.rcu_try_flip_state =
  781. rcu_try_flip_waitack_state;
  782. break;
  783. case rcu_try_flip_waitack_state:
  784. if (rcu_try_flip_waitack())
  785. rcu_ctrlblk.rcu_try_flip_state =
  786. rcu_try_flip_waitzero_state;
  787. break;
  788. case rcu_try_flip_waitzero_state:
  789. if (rcu_try_flip_waitzero())
  790. rcu_ctrlblk.rcu_try_flip_state =
  791. rcu_try_flip_waitmb_state;
  792. break;
  793. case rcu_try_flip_waitmb_state:
  794. if (rcu_try_flip_waitmb())
  795. rcu_ctrlblk.rcu_try_flip_state =
  796. rcu_try_flip_idle_state;
  797. }
  798. spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  799. }
  800. /*
  801. * Check to see if this CPU needs to do a memory barrier in order to
  802. * ensure that any prior RCU read-side critical sections have committed
  803. * their counter manipulations and critical-section memory references
  804. * before declaring the grace period to be completed.
  805. */
  806. static void rcu_check_mb(int cpu)
  807. {
  808. if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
  809. smp_mb(); /* Ensure RCU read-side accesses are visible. */
  810. per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
  811. }
  812. }
  813. void rcu_check_callbacks(int cpu, int user)
  814. {
  815. unsigned long flags;
  816. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  817. /*
  818. * If this CPU took its interrupt from user mode or from the
  819. * idle loop, and this is not a nested interrupt, then
  820. * this CPU has to have exited all prior preept-disable
  821. * sections of code. So increment the counter to note this.
  822. *
  823. * The memory barrier is needed to handle the case where
  824. * writes from a preempt-disable section of code get reordered
  825. * into schedule() by this CPU's write buffer. So the memory
  826. * barrier makes sure that the rcu_qsctr_inc() is seen by other
  827. * CPUs to happen after any such write.
  828. */
  829. if (user ||
  830. (idle_cpu(cpu) && !in_softirq() &&
  831. hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
  832. smp_mb(); /* Guard against aggressive schedule(). */
  833. rcu_qsctr_inc(cpu);
  834. }
  835. rcu_check_mb(cpu);
  836. if (rcu_ctrlblk.completed == rdp->completed)
  837. rcu_try_flip();
  838. spin_lock_irqsave(&rdp->lock, flags);
  839. RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
  840. __rcu_advance_callbacks(rdp);
  841. if (rdp->donelist == NULL) {
  842. spin_unlock_irqrestore(&rdp->lock, flags);
  843. } else {
  844. spin_unlock_irqrestore(&rdp->lock, flags);
  845. raise_softirq(RCU_SOFTIRQ);
  846. }
  847. }
  848. /*
  849. * Needed by dynticks, to make sure all RCU processing has finished
  850. * when we go idle:
  851. */
  852. void rcu_advance_callbacks(int cpu, int user)
  853. {
  854. unsigned long flags;
  855. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  856. if (rcu_ctrlblk.completed == rdp->completed) {
  857. rcu_try_flip();
  858. if (rcu_ctrlblk.completed == rdp->completed)
  859. return;
  860. }
  861. spin_lock_irqsave(&rdp->lock, flags);
  862. RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
  863. __rcu_advance_callbacks(rdp);
  864. spin_unlock_irqrestore(&rdp->lock, flags);
  865. }
  866. #ifdef CONFIG_HOTPLUG_CPU
  867. #define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
  868. *dsttail = srclist; \
  869. if (srclist != NULL) { \
  870. dsttail = srctail; \
  871. srclist = NULL; \
  872. srctail = &srclist;\
  873. } \
  874. } while (0)
  875. void rcu_offline_cpu(int cpu)
  876. {
  877. int i;
  878. struct rcu_head *list = NULL;
  879. unsigned long flags;
  880. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  881. struct rcu_head *schedlist = NULL;
  882. struct rcu_head **schedtail = &schedlist;
  883. struct rcu_head **tail = &list;
  884. /*
  885. * Remove all callbacks from the newly dead CPU, retaining order.
  886. * Otherwise rcu_barrier() will fail
  887. */
  888. spin_lock_irqsave(&rdp->lock, flags);
  889. rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
  890. for (i = GP_STAGES - 1; i >= 0; i--)
  891. rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
  892. list, tail);
  893. rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
  894. rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
  895. schedlist, schedtail);
  896. rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
  897. schedlist, schedtail);
  898. rdp->rcu_sched_sleeping = 0;
  899. spin_unlock_irqrestore(&rdp->lock, flags);
  900. rdp->waitlistcount = 0;
  901. /* Disengage the newly dead CPU from the grace-period computation. */
  902. spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
  903. rcu_check_mb(cpu);
  904. if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
  905. smp_mb(); /* Subsequent counter accesses must see new value */
  906. per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
  907. smp_mb(); /* Subsequent RCU read-side critical sections */
  908. /* seen -after- acknowledgement. */
  909. }
  910. RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
  911. RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
  912. RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
  913. RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
  914. cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map));
  915. spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  916. /*
  917. * Place the removed callbacks on the current CPU's queue.
  918. * Make them all start a new grace period: simple approach,
  919. * in theory could starve a given set of callbacks, but
  920. * you would need to be doing some serious CPU hotplugging
  921. * to make this happen. If this becomes a problem, adding
  922. * a synchronize_rcu() to the hotplug path would be a simple
  923. * fix.
  924. */
  925. local_irq_save(flags); /* disable preempt till we know what lock. */
  926. rdp = RCU_DATA_ME();
  927. spin_lock(&rdp->lock);
  928. *rdp->nexttail = list;
  929. if (list)
  930. rdp->nexttail = tail;
  931. *rdp->nextschedtail = schedlist;
  932. if (schedlist)
  933. rdp->nextschedtail = schedtail;
  934. spin_unlock_irqrestore(&rdp->lock, flags);
  935. }
  936. #else /* #ifdef CONFIG_HOTPLUG_CPU */
  937. void rcu_offline_cpu(int cpu)
  938. {
  939. }
  940. #endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
  941. void __cpuinit rcu_online_cpu(int cpu)
  942. {
  943. unsigned long flags;
  944. struct rcu_data *rdp;
  945. spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
  946. cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map));
  947. spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
  948. /*
  949. * The rcu_sched grace-period processing might have bypassed
  950. * this CPU, given that it was not in the rcu_cpu_online_map
  951. * when the grace-period scan started. This means that the
  952. * grace-period task might sleep. So make sure that if this
  953. * should happen, the first callback posted to this CPU will
  954. * wake up the grace-period task if need be.
  955. */
  956. rdp = RCU_DATA_CPU(cpu);
  957. spin_lock_irqsave(&rdp->lock, flags);
  958. rdp->rcu_sched_sleeping = 1;
  959. spin_unlock_irqrestore(&rdp->lock, flags);
  960. }
  961. static void rcu_process_callbacks(struct softirq_action *unused)
  962. {
  963. unsigned long flags;
  964. struct rcu_head *next, *list;
  965. struct rcu_data *rdp;
  966. local_irq_save(flags);
  967. rdp = RCU_DATA_ME();
  968. spin_lock(&rdp->lock);
  969. list = rdp->donelist;
  970. if (list == NULL) {
  971. spin_unlock_irqrestore(&rdp->lock, flags);
  972. return;
  973. }
  974. rdp->donelist = NULL;
  975. rdp->donetail = &rdp->donelist;
  976. RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
  977. spin_unlock_irqrestore(&rdp->lock, flags);
  978. while (list) {
  979. next = list->next;
  980. list->func(list);
  981. list = next;
  982. RCU_TRACE_ME(rcupreempt_trace_invoke);
  983. }
  984. }
  985. void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  986. {
  987. unsigned long flags;
  988. struct rcu_data *rdp;
  989. head->func = func;
  990. head->next = NULL;
  991. local_irq_save(flags);
  992. rdp = RCU_DATA_ME();
  993. spin_lock(&rdp->lock);
  994. __rcu_advance_callbacks(rdp);
  995. *rdp->nexttail = head;
  996. rdp->nexttail = &head->next;
  997. RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
  998. spin_unlock_irqrestore(&rdp->lock, flags);
  999. }
  1000. EXPORT_SYMBOL_GPL(call_rcu);
  1001. void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  1002. {
  1003. unsigned long flags;
  1004. struct rcu_data *rdp;
  1005. int wake_gp = 0;
  1006. head->func = func;
  1007. head->next = NULL;
  1008. local_irq_save(flags);
  1009. rdp = RCU_DATA_ME();
  1010. spin_lock(&rdp->lock);
  1011. *rdp->nextschedtail = head;
  1012. rdp->nextschedtail = &head->next;
  1013. if (rdp->rcu_sched_sleeping) {
  1014. /* Grace-period processing might be sleeping... */
  1015. rdp->rcu_sched_sleeping = 0;
  1016. wake_gp = 1;
  1017. }
  1018. spin_unlock_irqrestore(&rdp->lock, flags);
  1019. if (wake_gp) {
  1020. /* Wake up grace-period processing, unless someone beat us. */
  1021. spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
  1022. if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
  1023. wake_gp = 0;
  1024. rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
  1025. spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
  1026. if (wake_gp)
  1027. wake_up_interruptible(&rcu_ctrlblk.sched_wq);
  1028. }
  1029. }
  1030. EXPORT_SYMBOL_GPL(call_rcu_sched);
  1031. /*
  1032. * Wait until all currently running preempt_disable() code segments
  1033. * (including hardware-irq-disable segments) complete. Note that
  1034. * in -rt this does -not- necessarily result in all currently executing
  1035. * interrupt -handlers- having completed.
  1036. */
  1037. void __synchronize_sched(void)
  1038. {
  1039. struct rcu_synchronize rcu;
  1040. if (num_online_cpus() == 1)
  1041. return; /* blocking is gp if only one CPU! */
  1042. init_completion(&rcu.completion);
  1043. /* Will wake me after RCU finished. */
  1044. call_rcu_sched(&rcu.head, wakeme_after_rcu);
  1045. /* Wait for it. */
  1046. wait_for_completion(&rcu.completion);
  1047. }
  1048. EXPORT_SYMBOL_GPL(__synchronize_sched);
  1049. /*
  1050. * kthread function that manages call_rcu_sched grace periods.
  1051. */
  1052. static int rcu_sched_grace_period(void *arg)
  1053. {
  1054. int couldsleep; /* might sleep after current pass. */
  1055. int couldsleepnext = 0; /* might sleep after next pass. */
  1056. int cpu;
  1057. unsigned long flags;
  1058. struct rcu_data *rdp;
  1059. int ret;
  1060. /*
  1061. * Each pass through the following loop handles one
  1062. * rcu_sched grace period cycle.
  1063. */
  1064. do {
  1065. /* Save each CPU's current state. */
  1066. for_each_online_cpu(cpu) {
  1067. dyntick_save_progress_counter_sched(cpu);
  1068. save_qsctr_sched(cpu);
  1069. }
  1070. /*
  1071. * Sleep for about an RCU grace-period's worth to
  1072. * allow better batching and to consume less CPU.
  1073. */
  1074. schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
  1075. /*
  1076. * If there was nothing to do last time, prepare to
  1077. * sleep at the end of the current grace period cycle.
  1078. */
  1079. couldsleep = couldsleepnext;
  1080. couldsleepnext = 1;
  1081. if (couldsleep) {
  1082. spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
  1083. rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
  1084. spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
  1085. }
  1086. /*
  1087. * Wait on each CPU in turn to have either visited
  1088. * a quiescent state or been in dynticks-idle mode.
  1089. */
  1090. for_each_online_cpu(cpu) {
  1091. while (rcu_qsctr_inc_needed(cpu) &&
  1092. rcu_qsctr_inc_needed_dyntick(cpu)) {
  1093. /* resched_cpu(cpu); @@@ */
  1094. schedule_timeout_interruptible(1);
  1095. }
  1096. }
  1097. /* Advance callbacks for each CPU. */
  1098. for_each_online_cpu(cpu) {
  1099. rdp = RCU_DATA_CPU(cpu);
  1100. spin_lock_irqsave(&rdp->lock, flags);
  1101. /*
  1102. * We are running on this CPU irq-disabled, so no
  1103. * CPU can go offline until we re-enable irqs.
  1104. * The current CPU might have already gone
  1105. * offline (between the for_each_offline_cpu and
  1106. * the spin_lock_irqsave), but in that case all its
  1107. * callback lists will be empty, so no harm done.
  1108. *
  1109. * Advance the callbacks! We share normal RCU's
  1110. * donelist, since callbacks are invoked the
  1111. * same way in either case.
  1112. */
  1113. if (rdp->waitschedlist != NULL) {
  1114. *rdp->donetail = rdp->waitschedlist;
  1115. rdp->donetail = rdp->waitschedtail;
  1116. /*
  1117. * Next rcu_check_callbacks() will
  1118. * do the required raise_softirq().
  1119. */
  1120. }
  1121. if (rdp->nextschedlist != NULL) {
  1122. rdp->waitschedlist = rdp->nextschedlist;
  1123. rdp->waitschedtail = rdp->nextschedtail;
  1124. couldsleep = 0;
  1125. couldsleepnext = 0;
  1126. } else {
  1127. rdp->waitschedlist = NULL;
  1128. rdp->waitschedtail = &rdp->waitschedlist;
  1129. }
  1130. rdp->nextschedlist = NULL;
  1131. rdp->nextschedtail = &rdp->nextschedlist;
  1132. /* Mark sleep intention. */
  1133. rdp->rcu_sched_sleeping = couldsleep;
  1134. spin_unlock_irqrestore(&rdp->lock, flags);
  1135. }
  1136. /* If we saw callbacks on the last scan, go deal with them. */
  1137. if (!couldsleep)
  1138. continue;
  1139. /* Attempt to block... */
  1140. spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
  1141. if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
  1142. /*
  1143. * Someone posted a callback after we scanned.
  1144. * Go take care of it.
  1145. */
  1146. spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
  1147. couldsleepnext = 0;
  1148. continue;
  1149. }
  1150. /* Block until the next person posts a callback. */
  1151. rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
  1152. spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
  1153. ret = 0;
  1154. __wait_event_interruptible(rcu_ctrlblk.sched_wq,
  1155. rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
  1156. ret);
  1157. /*
  1158. * Signals would prevent us from sleeping, and we cannot
  1159. * do much with them in any case. So flush them.
  1160. */
  1161. if (ret)
  1162. flush_signals(current);
  1163. couldsleepnext = 0;
  1164. } while (!kthread_should_stop());
  1165. return (0);
  1166. }
  1167. /*
  1168. * Check to see if any future RCU-related work will need to be done
  1169. * by the current CPU, even if none need be done immediately, returning
  1170. * 1 if so. Assumes that notifiers would take care of handling any
  1171. * outstanding requests from the RCU core.
  1172. *
  1173. * This function is part of the RCU implementation; it is -not-
  1174. * an exported member of the RCU API.
  1175. */
  1176. int rcu_needs_cpu(int cpu)
  1177. {
  1178. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  1179. return (rdp->donelist != NULL ||
  1180. !!rdp->waitlistcount ||
  1181. rdp->nextlist != NULL ||
  1182. rdp->nextschedlist != NULL ||
  1183. rdp->waitschedlist != NULL);
  1184. }
  1185. int rcu_pending(int cpu)
  1186. {
  1187. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  1188. /* The CPU has at least one callback queued somewhere. */
  1189. if (rdp->donelist != NULL ||
  1190. !!rdp->waitlistcount ||
  1191. rdp->nextlist != NULL ||
  1192. rdp->nextschedlist != NULL ||
  1193. rdp->waitschedlist != NULL)
  1194. return 1;
  1195. /* The RCU core needs an acknowledgement from this CPU. */
  1196. if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
  1197. (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
  1198. return 1;
  1199. /* This CPU has fallen behind the global grace-period number. */
  1200. if (rdp->completed != rcu_ctrlblk.completed)
  1201. return 1;
  1202. /* Nothing needed from this CPU. */
  1203. return 0;
  1204. }
  1205. static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
  1206. unsigned long action, void *hcpu)
  1207. {
  1208. long cpu = (long)hcpu;
  1209. switch (action) {
  1210. case CPU_UP_PREPARE:
  1211. case CPU_UP_PREPARE_FROZEN:
  1212. rcu_online_cpu(cpu);
  1213. break;
  1214. case CPU_UP_CANCELED:
  1215. case CPU_UP_CANCELED_FROZEN:
  1216. case CPU_DEAD:
  1217. case CPU_DEAD_FROZEN:
  1218. rcu_offline_cpu(cpu);
  1219. break;
  1220. default:
  1221. break;
  1222. }
  1223. return NOTIFY_OK;
  1224. }
  1225. static struct notifier_block __cpuinitdata rcu_nb = {
  1226. .notifier_call = rcu_cpu_notify,
  1227. };
  1228. void __init __rcu_init(void)
  1229. {
  1230. int cpu;
  1231. int i;
  1232. struct rcu_data *rdp;
  1233. printk(KERN_NOTICE "Preemptible RCU implementation.\n");
  1234. for_each_possible_cpu(cpu) {
  1235. rdp = RCU_DATA_CPU(cpu);
  1236. spin_lock_init(&rdp->lock);
  1237. rdp->completed = 0;
  1238. rdp->waitlistcount = 0;
  1239. rdp->nextlist = NULL;
  1240. rdp->nexttail = &rdp->nextlist;
  1241. for (i = 0; i < GP_STAGES; i++) {
  1242. rdp->waitlist[i] = NULL;
  1243. rdp->waittail[i] = &rdp->waitlist[i];
  1244. }
  1245. rdp->donelist = NULL;
  1246. rdp->donetail = &rdp->donelist;
  1247. rdp->rcu_flipctr[0] = 0;
  1248. rdp->rcu_flipctr[1] = 0;
  1249. rdp->nextschedlist = NULL;
  1250. rdp->nextschedtail = &rdp->nextschedlist;
  1251. rdp->waitschedlist = NULL;
  1252. rdp->waitschedtail = &rdp->waitschedlist;
  1253. rdp->rcu_sched_sleeping = 0;
  1254. }
  1255. register_cpu_notifier(&rcu_nb);
  1256. /*
  1257. * We don't need protection against CPU-Hotplug here
  1258. * since
  1259. * a) If a CPU comes online while we are iterating over the
  1260. * cpu_online_mask below, we would only end up making a
  1261. * duplicate call to rcu_online_cpu() which sets the corresponding
  1262. * CPU's mask in the rcu_cpu_online_map.
  1263. *
  1264. * b) A CPU cannot go offline at this point in time since the user
  1265. * does not have access to the sysfs interface, nor do we
  1266. * suspend the system.
  1267. */
  1268. for_each_online_cpu(cpu)
  1269. rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
  1270. open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  1271. }
  1272. /*
  1273. * Late-boot-time RCU initialization that must wait until after scheduler
  1274. * has been initialized.
  1275. */
  1276. void __init rcu_init_sched(void)
  1277. {
  1278. rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
  1279. NULL,
  1280. "rcu_sched_grace_period");
  1281. WARN_ON(IS_ERR(rcu_sched_grace_period_task));
  1282. }
  1283. #ifdef CONFIG_RCU_TRACE
  1284. long *rcupreempt_flipctr(int cpu)
  1285. {
  1286. return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
  1287. }
  1288. EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
  1289. int rcupreempt_flip_flag(int cpu)
  1290. {
  1291. return per_cpu(rcu_flip_flag, cpu);
  1292. }
  1293. EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
  1294. int rcupreempt_mb_flag(int cpu)
  1295. {
  1296. return per_cpu(rcu_mb_flag, cpu);
  1297. }
  1298. EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
  1299. char *rcupreempt_try_flip_state_name(void)
  1300. {
  1301. return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
  1302. }
  1303. EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
  1304. struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
  1305. {
  1306. struct rcu_data *rdp = RCU_DATA_CPU(cpu);
  1307. return &rdp->trace;
  1308. }
  1309. EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
  1310. #endif /* #ifdef RCU_TRACE */