rcutree_plugin.h 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440
  1. /*
  2. * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  3. * Internal non-public definitions that provide either classic
  4. * or preemptible semantics.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, write to the Free Software
  18. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19. *
  20. * Copyright Red Hat, 2009
  21. * Copyright IBM Corporation, 2009
  22. *
  23. * Author: Ingo Molnar <mingo@elte.hu>
  24. * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  25. */
  26. #include <linux/delay.h>
  27. #include <linux/gfp.h>
  28. #include <linux/oom.h>
  29. #include <linux/smpboot.h>
  30. #define RCU_KTHREAD_PRIO 1
  31. #ifdef CONFIG_RCU_BOOST
  32. #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
  33. #else
  34. #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
  35. #endif
  36. #ifdef CONFIG_RCU_NOCB_CPU
  37. static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
  38. static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
  39. static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
  40. static char __initdata nocb_buf[NR_CPUS * 5];
  41. #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  42. /*
  43. * Check the RCU kernel configuration parameters and print informative
  44. * messages about anything out of the ordinary. If you like #ifdef, you
  45. * will love this function.
  46. */
  47. static void __init rcu_bootup_announce_oddness(void)
  48. {
  49. #ifdef CONFIG_RCU_TRACE
  50. printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
  51. #endif
  52. #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
  53. printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
  54. CONFIG_RCU_FANOUT);
  55. #endif
  56. #ifdef CONFIG_RCU_FANOUT_EXACT
  57. printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
  58. #endif
  59. #ifdef CONFIG_RCU_FAST_NO_HZ
  60. printk(KERN_INFO
  61. "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
  62. #endif
  63. #ifdef CONFIG_PROVE_RCU
  64. printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
  65. #endif
  66. #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
  67. printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
  68. #endif
  69. #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
  70. printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n");
  71. #endif
  72. #if defined(CONFIG_RCU_CPU_STALL_INFO)
  73. printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
  74. #endif
  75. #if NUM_RCU_LVL_4 != 0
  76. printk(KERN_INFO "\tFour-level hierarchy is enabled.\n");
  77. #endif
  78. if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
  79. printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
  80. if (nr_cpu_ids != NR_CPUS)
  81. printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
  82. #ifdef CONFIG_RCU_NOCB_CPU
  83. if (have_rcu_nocb_mask) {
  84. cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
  85. pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
  86. if (rcu_nocb_poll)
  87. pr_info("\tExperimental polled no-CBs CPUs.\n");
  88. }
  89. #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
  90. }
  91. #ifdef CONFIG_TREE_PREEMPT_RCU
  92. struct rcu_state rcu_preempt_state =
  93. RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
  94. DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
  95. static struct rcu_state *rcu_state = &rcu_preempt_state;
  96. static int rcu_preempted_readers_exp(struct rcu_node *rnp);
  97. /*
  98. * Tell them what RCU they are running.
  99. */
  100. static void __init rcu_bootup_announce(void)
  101. {
  102. printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
  103. rcu_bootup_announce_oddness();
  104. }
  105. /*
  106. * Return the number of RCU-preempt batches processed thus far
  107. * for debug and statistics.
  108. */
  109. long rcu_batches_completed_preempt(void)
  110. {
  111. return rcu_preempt_state.completed;
  112. }
  113. EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
  114. /*
  115. * Return the number of RCU batches processed thus far for debug & stats.
  116. */
  117. long rcu_batches_completed(void)
  118. {
  119. return rcu_batches_completed_preempt();
  120. }
  121. EXPORT_SYMBOL_GPL(rcu_batches_completed);
  122. /*
  123. * Force a quiescent state for preemptible RCU.
  124. */
  125. void rcu_force_quiescent_state(void)
  126. {
  127. force_quiescent_state(&rcu_preempt_state);
  128. }
  129. EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  130. /*
  131. * Record a preemptible-RCU quiescent state for the specified CPU. Note
  132. * that this just means that the task currently running on the CPU is
  133. * not in a quiescent state. There might be any number of tasks blocked
  134. * while in an RCU read-side critical section.
  135. *
  136. * Unlike the other rcu_*_qs() functions, callers to this function
  137. * must disable irqs in order to protect the assignment to
  138. * ->rcu_read_unlock_special.
  139. */
  140. static void rcu_preempt_qs(int cpu)
  141. {
  142. struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
  143. if (rdp->passed_quiesce == 0)
  144. trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
  145. rdp->passed_quiesce = 1;
  146. current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
  147. }
  148. /*
  149. * We have entered the scheduler, and the current task might soon be
  150. * context-switched away from. If this task is in an RCU read-side
  151. * critical section, we will no longer be able to rely on the CPU to
  152. * record that fact, so we enqueue the task on the blkd_tasks list.
  153. * The task will dequeue itself when it exits the outermost enclosing
  154. * RCU read-side critical section. Therefore, the current grace period
  155. * cannot be permitted to complete until the blkd_tasks list entries
  156. * predating the current grace period drain, in other words, until
  157. * rnp->gp_tasks becomes NULL.
  158. *
  159. * Caller must disable preemption.
  160. */
  161. static void rcu_preempt_note_context_switch(int cpu)
  162. {
  163. struct task_struct *t = current;
  164. unsigned long flags;
  165. struct rcu_data *rdp;
  166. struct rcu_node *rnp;
  167. if (t->rcu_read_lock_nesting > 0 &&
  168. (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
  169. /* Possibly blocking in an RCU read-side critical section. */
  170. rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
  171. rnp = rdp->mynode;
  172. raw_spin_lock_irqsave(&rnp->lock, flags);
  173. t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
  174. t->rcu_blocked_node = rnp;
  175. /*
  176. * If this CPU has already checked in, then this task
  177. * will hold up the next grace period rather than the
  178. * current grace period. Queue the task accordingly.
  179. * If the task is queued for the current grace period
  180. * (i.e., this CPU has not yet passed through a quiescent
  181. * state for the current grace period), then as long
  182. * as that task remains queued, the current grace period
  183. * cannot end. Note that there is some uncertainty as
  184. * to exactly when the current grace period started.
  185. * We take a conservative approach, which can result
  186. * in unnecessarily waiting on tasks that started very
  187. * slightly after the current grace period began. C'est
  188. * la vie!!!
  189. *
  190. * But first, note that the current CPU must still be
  191. * on line!
  192. */
  193. WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
  194. WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
  195. if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
  196. list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
  197. rnp->gp_tasks = &t->rcu_node_entry;
  198. #ifdef CONFIG_RCU_BOOST
  199. if (rnp->boost_tasks != NULL)
  200. rnp->boost_tasks = rnp->gp_tasks;
  201. #endif /* #ifdef CONFIG_RCU_BOOST */
  202. } else {
  203. list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
  204. if (rnp->qsmask & rdp->grpmask)
  205. rnp->gp_tasks = &t->rcu_node_entry;
  206. }
  207. trace_rcu_preempt_task(rdp->rsp->name,
  208. t->pid,
  209. (rnp->qsmask & rdp->grpmask)
  210. ? rnp->gpnum
  211. : rnp->gpnum + 1);
  212. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  213. } else if (t->rcu_read_lock_nesting < 0 &&
  214. t->rcu_read_unlock_special) {
  215. /*
  216. * Complete exit from RCU read-side critical section on
  217. * behalf of preempted instance of __rcu_read_unlock().
  218. */
  219. rcu_read_unlock_special(t);
  220. }
  221. /*
  222. * Either we were not in an RCU read-side critical section to
  223. * begin with, or we have now recorded that critical section
  224. * globally. Either way, we can now note a quiescent state
  225. * for this CPU. Again, if we were in an RCU read-side critical
  226. * section, and if that critical section was blocking the current
  227. * grace period, then the fact that the task has been enqueued
  228. * means that we continue to block the current grace period.
  229. */
  230. local_irq_save(flags);
  231. rcu_preempt_qs(cpu);
  232. local_irq_restore(flags);
  233. }
  234. /*
  235. * Check for preempted RCU readers blocking the current grace period
  236. * for the specified rcu_node structure. If the caller needs a reliable
  237. * answer, it must hold the rcu_node's ->lock.
  238. */
  239. static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
  240. {
  241. return rnp->gp_tasks != NULL;
  242. }
  243. /*
  244. * Record a quiescent state for all tasks that were previously queued
  245. * on the specified rcu_node structure and that were blocking the current
  246. * RCU grace period. The caller must hold the specified rnp->lock with
  247. * irqs disabled, and this lock is released upon return, but irqs remain
  248. * disabled.
  249. */
  250. static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
  251. __releases(rnp->lock)
  252. {
  253. unsigned long mask;
  254. struct rcu_node *rnp_p;
  255. if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
  256. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  257. return; /* Still need more quiescent states! */
  258. }
  259. rnp_p = rnp->parent;
  260. if (rnp_p == NULL) {
  261. /*
  262. * Either there is only one rcu_node in the tree,
  263. * or tasks were kicked up to root rcu_node due to
  264. * CPUs going offline.
  265. */
  266. rcu_report_qs_rsp(&rcu_preempt_state, flags);
  267. return;
  268. }
  269. /* Report up the rest of the hierarchy. */
  270. mask = rnp->grpmask;
  271. raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
  272. raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
  273. rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
  274. }
  275. /*
  276. * Advance a ->blkd_tasks-list pointer to the next entry, instead
  277. * returning NULL if at the end of the list.
  278. */
  279. static struct list_head *rcu_next_node_entry(struct task_struct *t,
  280. struct rcu_node *rnp)
  281. {
  282. struct list_head *np;
  283. np = t->rcu_node_entry.next;
  284. if (np == &rnp->blkd_tasks)
  285. np = NULL;
  286. return np;
  287. }
  288. /*
  289. * Handle special cases during rcu_read_unlock(), such as needing to
  290. * notify RCU core processing or task having blocked during the RCU
  291. * read-side critical section.
  292. */
  293. void rcu_read_unlock_special(struct task_struct *t)
  294. {
  295. int empty;
  296. int empty_exp;
  297. int empty_exp_now;
  298. unsigned long flags;
  299. struct list_head *np;
  300. #ifdef CONFIG_RCU_BOOST
  301. struct rt_mutex *rbmp = NULL;
  302. #endif /* #ifdef CONFIG_RCU_BOOST */
  303. struct rcu_node *rnp;
  304. int special;
  305. /* NMI handlers cannot block and cannot safely manipulate state. */
  306. if (in_nmi())
  307. return;
  308. local_irq_save(flags);
  309. /*
  310. * If RCU core is waiting for this CPU to exit critical section,
  311. * let it know that we have done so.
  312. */
  313. special = t->rcu_read_unlock_special;
  314. if (special & RCU_READ_UNLOCK_NEED_QS) {
  315. rcu_preempt_qs(smp_processor_id());
  316. }
  317. /* Hardware IRQ handlers cannot block. */
  318. if (in_irq() || in_serving_softirq()) {
  319. local_irq_restore(flags);
  320. return;
  321. }
  322. /* Clean up if blocked during RCU read-side critical section. */
  323. if (special & RCU_READ_UNLOCK_BLOCKED) {
  324. t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
  325. /*
  326. * Remove this task from the list it blocked on. The
  327. * task can migrate while we acquire the lock, but at
  328. * most one time. So at most two passes through loop.
  329. */
  330. for (;;) {
  331. rnp = t->rcu_blocked_node;
  332. raw_spin_lock(&rnp->lock); /* irqs already disabled. */
  333. if (rnp == t->rcu_blocked_node)
  334. break;
  335. raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
  336. }
  337. empty = !rcu_preempt_blocked_readers_cgp(rnp);
  338. empty_exp = !rcu_preempted_readers_exp(rnp);
  339. smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
  340. np = rcu_next_node_entry(t, rnp);
  341. list_del_init(&t->rcu_node_entry);
  342. t->rcu_blocked_node = NULL;
  343. trace_rcu_unlock_preempted_task("rcu_preempt",
  344. rnp->gpnum, t->pid);
  345. if (&t->rcu_node_entry == rnp->gp_tasks)
  346. rnp->gp_tasks = np;
  347. if (&t->rcu_node_entry == rnp->exp_tasks)
  348. rnp->exp_tasks = np;
  349. #ifdef CONFIG_RCU_BOOST
  350. if (&t->rcu_node_entry == rnp->boost_tasks)
  351. rnp->boost_tasks = np;
  352. /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
  353. if (t->rcu_boost_mutex) {
  354. rbmp = t->rcu_boost_mutex;
  355. t->rcu_boost_mutex = NULL;
  356. }
  357. #endif /* #ifdef CONFIG_RCU_BOOST */
  358. /*
  359. * If this was the last task on the current list, and if
  360. * we aren't waiting on any CPUs, report the quiescent state.
  361. * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
  362. * so we must take a snapshot of the expedited state.
  363. */
  364. empty_exp_now = !rcu_preempted_readers_exp(rnp);
  365. if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
  366. trace_rcu_quiescent_state_report("preempt_rcu",
  367. rnp->gpnum,
  368. 0, rnp->qsmask,
  369. rnp->level,
  370. rnp->grplo,
  371. rnp->grphi,
  372. !!rnp->gp_tasks);
  373. rcu_report_unblock_qs_rnp(rnp, flags);
  374. } else {
  375. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  376. }
  377. #ifdef CONFIG_RCU_BOOST
  378. /* Unboost if we were boosted. */
  379. if (rbmp)
  380. rt_mutex_unlock(rbmp);
  381. #endif /* #ifdef CONFIG_RCU_BOOST */
  382. /*
  383. * If this was the last task on the expedited lists,
  384. * then we need to report up the rcu_node hierarchy.
  385. */
  386. if (!empty_exp && empty_exp_now)
  387. rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
  388. } else {
  389. local_irq_restore(flags);
  390. }
  391. }
  392. #ifdef CONFIG_RCU_CPU_STALL_VERBOSE
  393. /*
  394. * Dump detailed information for all tasks blocking the current RCU
  395. * grace period on the specified rcu_node structure.
  396. */
  397. static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
  398. {
  399. unsigned long flags;
  400. struct task_struct *t;
  401. raw_spin_lock_irqsave(&rnp->lock, flags);
  402. if (!rcu_preempt_blocked_readers_cgp(rnp)) {
  403. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  404. return;
  405. }
  406. t = list_entry(rnp->gp_tasks,
  407. struct task_struct, rcu_node_entry);
  408. list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
  409. sched_show_task(t);
  410. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  411. }
  412. /*
  413. * Dump detailed information for all tasks blocking the current RCU
  414. * grace period.
  415. */
  416. static void rcu_print_detail_task_stall(struct rcu_state *rsp)
  417. {
  418. struct rcu_node *rnp = rcu_get_root(rsp);
  419. rcu_print_detail_task_stall_rnp(rnp);
  420. rcu_for_each_leaf_node(rsp, rnp)
  421. rcu_print_detail_task_stall_rnp(rnp);
  422. }
  423. #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
  424. static void rcu_print_detail_task_stall(struct rcu_state *rsp)
  425. {
  426. }
  427. #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
  428. #ifdef CONFIG_RCU_CPU_STALL_INFO
  429. static void rcu_print_task_stall_begin(struct rcu_node *rnp)
  430. {
  431. printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
  432. rnp->level, rnp->grplo, rnp->grphi);
  433. }
  434. static void rcu_print_task_stall_end(void)
  435. {
  436. printk(KERN_CONT "\n");
  437. }
  438. #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
  439. static void rcu_print_task_stall_begin(struct rcu_node *rnp)
  440. {
  441. }
  442. static void rcu_print_task_stall_end(void)
  443. {
  444. }
  445. #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
  446. /*
  447. * Scan the current list of tasks blocked within RCU read-side critical
  448. * sections, printing out the tid of each.
  449. */
  450. static int rcu_print_task_stall(struct rcu_node *rnp)
  451. {
  452. struct task_struct *t;
  453. int ndetected = 0;
  454. if (!rcu_preempt_blocked_readers_cgp(rnp))
  455. return 0;
  456. rcu_print_task_stall_begin(rnp);
  457. t = list_entry(rnp->gp_tasks,
  458. struct task_struct, rcu_node_entry);
  459. list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
  460. printk(KERN_CONT " P%d", t->pid);
  461. ndetected++;
  462. }
  463. rcu_print_task_stall_end();
  464. return ndetected;
  465. }
  466. /*
  467. * Check that the list of blocked tasks for the newly completed grace
  468. * period is in fact empty. It is a serious bug to complete a grace
  469. * period that still has RCU readers blocked! This function must be
  470. * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
  471. * must be held by the caller.
  472. *
  473. * Also, if there are blocked tasks on the list, they automatically
  474. * block the newly created grace period, so set up ->gp_tasks accordingly.
  475. */
  476. static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
  477. {
  478. WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
  479. if (!list_empty(&rnp->blkd_tasks))
  480. rnp->gp_tasks = rnp->blkd_tasks.next;
  481. WARN_ON_ONCE(rnp->qsmask);
  482. }
  483. #ifdef CONFIG_HOTPLUG_CPU
  484. /*
  485. * Handle tasklist migration for case in which all CPUs covered by the
  486. * specified rcu_node have gone offline. Move them up to the root
  487. * rcu_node. The reason for not just moving them to the immediate
  488. * parent is to remove the need for rcu_read_unlock_special() to
  489. * make more than two attempts to acquire the target rcu_node's lock.
  490. * Returns true if there were tasks blocking the current RCU grace
  491. * period.
  492. *
  493. * Returns 1 if there was previously a task blocking the current grace
  494. * period on the specified rcu_node structure.
  495. *
  496. * The caller must hold rnp->lock with irqs disabled.
  497. */
  498. static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
  499. struct rcu_node *rnp,
  500. struct rcu_data *rdp)
  501. {
  502. struct list_head *lp;
  503. struct list_head *lp_root;
  504. int retval = 0;
  505. struct rcu_node *rnp_root = rcu_get_root(rsp);
  506. struct task_struct *t;
  507. if (rnp == rnp_root) {
  508. WARN_ONCE(1, "Last CPU thought to be offlined?");
  509. return 0; /* Shouldn't happen: at least one CPU online. */
  510. }
  511. /* If we are on an internal node, complain bitterly. */
  512. WARN_ON_ONCE(rnp != rdp->mynode);
  513. /*
  514. * Move tasks up to root rcu_node. Don't try to get fancy for
  515. * this corner-case operation -- just put this node's tasks
  516. * at the head of the root node's list, and update the root node's
  517. * ->gp_tasks and ->exp_tasks pointers to those of this node's,
  518. * if non-NULL. This might result in waiting for more tasks than
  519. * absolutely necessary, but this is a good performance/complexity
  520. * tradeoff.
  521. */
  522. if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
  523. retval |= RCU_OFL_TASKS_NORM_GP;
  524. if (rcu_preempted_readers_exp(rnp))
  525. retval |= RCU_OFL_TASKS_EXP_GP;
  526. lp = &rnp->blkd_tasks;
  527. lp_root = &rnp_root->blkd_tasks;
  528. while (!list_empty(lp)) {
  529. t = list_entry(lp->next, typeof(*t), rcu_node_entry);
  530. raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
  531. list_del(&t->rcu_node_entry);
  532. t->rcu_blocked_node = rnp_root;
  533. list_add(&t->rcu_node_entry, lp_root);
  534. if (&t->rcu_node_entry == rnp->gp_tasks)
  535. rnp_root->gp_tasks = rnp->gp_tasks;
  536. if (&t->rcu_node_entry == rnp->exp_tasks)
  537. rnp_root->exp_tasks = rnp->exp_tasks;
  538. #ifdef CONFIG_RCU_BOOST
  539. if (&t->rcu_node_entry == rnp->boost_tasks)
  540. rnp_root->boost_tasks = rnp->boost_tasks;
  541. #endif /* #ifdef CONFIG_RCU_BOOST */
  542. raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
  543. }
  544. rnp->gp_tasks = NULL;
  545. rnp->exp_tasks = NULL;
  546. #ifdef CONFIG_RCU_BOOST
  547. rnp->boost_tasks = NULL;
  548. /*
  549. * In case root is being boosted and leaf was not. Make sure
  550. * that we boost the tasks blocking the current grace period
  551. * in this case.
  552. */
  553. raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
  554. if (rnp_root->boost_tasks != NULL &&
  555. rnp_root->boost_tasks != rnp_root->gp_tasks &&
  556. rnp_root->boost_tasks != rnp_root->exp_tasks)
  557. rnp_root->boost_tasks = rnp_root->gp_tasks;
  558. raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
  559. #endif /* #ifdef CONFIG_RCU_BOOST */
  560. return retval;
  561. }
  562. #endif /* #ifdef CONFIG_HOTPLUG_CPU */
  563. /*
  564. * Check for a quiescent state from the current CPU. When a task blocks,
  565. * the task is recorded in the corresponding CPU's rcu_node structure,
  566. * which is checked elsewhere.
  567. *
  568. * Caller must disable hard irqs.
  569. */
  570. static void rcu_preempt_check_callbacks(int cpu)
  571. {
  572. struct task_struct *t = current;
  573. if (t->rcu_read_lock_nesting == 0) {
  574. rcu_preempt_qs(cpu);
  575. return;
  576. }
  577. if (t->rcu_read_lock_nesting > 0 &&
  578. per_cpu(rcu_preempt_data, cpu).qs_pending)
  579. t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
  580. }
  581. #ifdef CONFIG_RCU_BOOST
  582. static void rcu_preempt_do_callbacks(void)
  583. {
  584. rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
  585. }
  586. #endif /* #ifdef CONFIG_RCU_BOOST */
  587. /*
  588. * Queue a preemptible-RCU callback for invocation after a grace period.
  589. */
  590. void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
  591. {
  592. __call_rcu(head, func, &rcu_preempt_state, -1, 0);
  593. }
  594. EXPORT_SYMBOL_GPL(call_rcu);
  595. /*
  596. * Queue an RCU callback for lazy invocation after a grace period.
  597. * This will likely be later named something like "call_rcu_lazy()",
  598. * but this change will require some way of tagging the lazy RCU
  599. * callbacks in the list of pending callbacks. Until then, this
  600. * function may only be called from __kfree_rcu().
  601. */
  602. void kfree_call_rcu(struct rcu_head *head,
  603. void (*func)(struct rcu_head *rcu))
  604. {
  605. __call_rcu(head, func, &rcu_preempt_state, -1, 1);
  606. }
  607. EXPORT_SYMBOL_GPL(kfree_call_rcu);
  608. /**
  609. * synchronize_rcu - wait until a grace period has elapsed.
  610. *
  611. * Control will return to the caller some time after a full grace
  612. * period has elapsed, in other words after all currently executing RCU
  613. * read-side critical sections have completed. Note, however, that
  614. * upon return from synchronize_rcu(), the caller might well be executing
  615. * concurrently with new RCU read-side critical sections that began while
  616. * synchronize_rcu() was waiting. RCU read-side critical sections are
  617. * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
  618. *
  619. * See the description of synchronize_sched() for more detailed information
  620. * on memory ordering guarantees.
  621. */
  622. void synchronize_rcu(void)
  623. {
  624. rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
  625. !lock_is_held(&rcu_lock_map) &&
  626. !lock_is_held(&rcu_sched_lock_map),
  627. "Illegal synchronize_rcu() in RCU read-side critical section");
  628. if (!rcu_scheduler_active)
  629. return;
  630. if (rcu_expedited)
  631. synchronize_rcu_expedited();
  632. else
  633. wait_rcu_gp(call_rcu);
  634. }
  635. EXPORT_SYMBOL_GPL(synchronize_rcu);
  636. static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
  637. static unsigned long sync_rcu_preempt_exp_count;
  638. static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
  639. /*
  640. * Return non-zero if there are any tasks in RCU read-side critical
  641. * sections blocking the current preemptible-RCU expedited grace period.
  642. * If there is no preemptible-RCU expedited grace period currently in
  643. * progress, returns zero unconditionally.
  644. */
  645. static int rcu_preempted_readers_exp(struct rcu_node *rnp)
  646. {
  647. return rnp->exp_tasks != NULL;
  648. }
  649. /*
  650. * return non-zero if there is no RCU expedited grace period in progress
  651. * for the specified rcu_node structure, in other words, if all CPUs and
  652. * tasks covered by the specified rcu_node structure have done their bit
  653. * for the current expedited grace period. Works only for preemptible
  654. * RCU -- other RCU implementation use other means.
  655. *
  656. * Caller must hold sync_rcu_preempt_exp_mutex.
  657. */
  658. static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  659. {
  660. return !rcu_preempted_readers_exp(rnp) &&
  661. ACCESS_ONCE(rnp->expmask) == 0;
  662. }
  663. /*
  664. * Report the exit from RCU read-side critical section for the last task
  665. * that queued itself during or before the current expedited preemptible-RCU
  666. * grace period. This event is reported either to the rcu_node structure on
  667. * which the task was queued or to one of that rcu_node structure's ancestors,
  668. * recursively up the tree. (Calm down, calm down, we do the recursion
  669. * iteratively!)
  670. *
  671. * Most callers will set the "wake" flag, but the task initiating the
  672. * expedited grace period need not wake itself.
  673. *
  674. * Caller must hold sync_rcu_preempt_exp_mutex.
  675. */
  676. static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  677. bool wake)
  678. {
  679. unsigned long flags;
  680. unsigned long mask;
  681. raw_spin_lock_irqsave(&rnp->lock, flags);
  682. for (;;) {
  683. if (!sync_rcu_preempt_exp_done(rnp)) {
  684. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  685. break;
  686. }
  687. if (rnp->parent == NULL) {
  688. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  689. if (wake)
  690. wake_up(&sync_rcu_preempt_exp_wq);
  691. break;
  692. }
  693. mask = rnp->grpmask;
  694. raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
  695. rnp = rnp->parent;
  696. raw_spin_lock(&rnp->lock); /* irqs already disabled */
  697. rnp->expmask &= ~mask;
  698. }
  699. }
  700. /*
  701. * Snapshot the tasks blocking the newly started preemptible-RCU expedited
  702. * grace period for the specified rcu_node structure. If there are no such
  703. * tasks, report it up the rcu_node hierarchy.
  704. *
  705. * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
  706. * CPU hotplug operations.
  707. */
  708. static void
  709. sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
  710. {
  711. unsigned long flags;
  712. int must_wait = 0;
  713. raw_spin_lock_irqsave(&rnp->lock, flags);
  714. if (list_empty(&rnp->blkd_tasks)) {
  715. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  716. } else {
  717. rnp->exp_tasks = rnp->blkd_tasks.next;
  718. rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
  719. must_wait = 1;
  720. }
  721. if (!must_wait)
  722. rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
  723. }
  724. /**
  725. * synchronize_rcu_expedited - Brute-force RCU grace period
  726. *
  727. * Wait for an RCU-preempt grace period, but expedite it. The basic
  728. * idea is to invoke synchronize_sched_expedited() to push all the tasks to
  729. * the ->blkd_tasks lists and wait for this list to drain. This consumes
  730. * significant time on all CPUs and is unfriendly to real-time workloads,
  731. * so is thus not recommended for any sort of common-case code.
  732. * In fact, if you are using synchronize_rcu_expedited() in a loop,
  733. * please restructure your code to batch your updates, and then Use a
  734. * single synchronize_rcu() instead.
  735. *
  736. * Note that it is illegal to call this function while holding any lock
  737. * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
  738. * to call this function from a CPU-hotplug notifier. Failing to observe
  739. * these restriction will result in deadlock.
  740. */
  741. void synchronize_rcu_expedited(void)
  742. {
  743. unsigned long flags;
  744. struct rcu_node *rnp;
  745. struct rcu_state *rsp = &rcu_preempt_state;
  746. unsigned long snap;
  747. int trycount = 0;
  748. smp_mb(); /* Caller's modifications seen first by other CPUs. */
  749. snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
  750. smp_mb(); /* Above access cannot bleed into critical section. */
  751. /*
  752. * Block CPU-hotplug operations. This means that any CPU-hotplug
  753. * operation that finds an rcu_node structure with tasks in the
  754. * process of being boosted will know that all tasks blocking
  755. * this expedited grace period will already be in the process of
  756. * being boosted. This simplifies the process of moving tasks
  757. * from leaf to root rcu_node structures.
  758. */
  759. get_online_cpus();
  760. /*
  761. * Acquire lock, falling back to synchronize_rcu() if too many
  762. * lock-acquisition failures. Of course, if someone does the
  763. * expedited grace period for us, just leave.
  764. */
  765. while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
  766. if (ULONG_CMP_LT(snap,
  767. ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
  768. put_online_cpus();
  769. goto mb_ret; /* Others did our work for us. */
  770. }
  771. if (trycount++ < 10) {
  772. udelay(trycount * num_online_cpus());
  773. } else {
  774. put_online_cpus();
  775. wait_rcu_gp(call_rcu);
  776. return;
  777. }
  778. }
  779. if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) {
  780. put_online_cpus();
  781. goto unlock_mb_ret; /* Others did our work for us. */
  782. }
  783. /* force all RCU readers onto ->blkd_tasks lists. */
  784. synchronize_sched_expedited();
  785. /* Initialize ->expmask for all non-leaf rcu_node structures. */
  786. rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
  787. raw_spin_lock_irqsave(&rnp->lock, flags);
  788. rnp->expmask = rnp->qsmaskinit;
  789. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  790. }
  791. /* Snapshot current state of ->blkd_tasks lists. */
  792. rcu_for_each_leaf_node(rsp, rnp)
  793. sync_rcu_preempt_exp_init(rsp, rnp);
  794. if (NUM_RCU_NODES > 1)
  795. sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
  796. put_online_cpus();
  797. /* Wait for snapshotted ->blkd_tasks lists to drain. */
  798. rnp = rcu_get_root(rsp);
  799. wait_event(sync_rcu_preempt_exp_wq,
  800. sync_rcu_preempt_exp_done(rnp));
  801. /* Clean up and exit. */
  802. smp_mb(); /* ensure expedited GP seen before counter increment. */
  803. ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
  804. unlock_mb_ret:
  805. mutex_unlock(&sync_rcu_preempt_exp_mutex);
  806. mb_ret:
  807. smp_mb(); /* ensure subsequent action seen after grace period. */
  808. }
  809. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  810. /**
  811. * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
  812. *
  813. * Note that this primitive does not necessarily wait for an RCU grace period
  814. * to complete. For example, if there are no RCU callbacks queued anywhere
  815. * in the system, then rcu_barrier() is within its rights to return
  816. * immediately, without waiting for anything, much less an RCU grace period.
  817. */
  818. void rcu_barrier(void)
  819. {
  820. _rcu_barrier(&rcu_preempt_state);
  821. }
  822. EXPORT_SYMBOL_GPL(rcu_barrier);
  823. /*
  824. * Initialize preemptible RCU's state structures.
  825. */
  826. static void __init __rcu_init_preempt(void)
  827. {
  828. rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
  829. }
  830. #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
  831. static struct rcu_state *rcu_state = &rcu_sched_state;
  832. /*
  833. * Tell them what RCU they are running.
  834. */
  835. static void __init rcu_bootup_announce(void)
  836. {
  837. printk(KERN_INFO "Hierarchical RCU implementation.\n");
  838. rcu_bootup_announce_oddness();
  839. }
  840. /*
  841. * Return the number of RCU batches processed thus far for debug & stats.
  842. */
  843. long rcu_batches_completed(void)
  844. {
  845. return rcu_batches_completed_sched();
  846. }
  847. EXPORT_SYMBOL_GPL(rcu_batches_completed);
  848. /*
  849. * Force a quiescent state for RCU, which, because there is no preemptible
  850. * RCU, becomes the same as rcu-sched.
  851. */
  852. void rcu_force_quiescent_state(void)
  853. {
  854. rcu_sched_force_quiescent_state();
  855. }
  856. EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  857. /*
  858. * Because preemptible RCU does not exist, we never have to check for
  859. * CPUs being in quiescent states.
  860. */
  861. static void rcu_preempt_note_context_switch(int cpu)
  862. {
  863. }
  864. /*
  865. * Because preemptible RCU does not exist, there are never any preempted
  866. * RCU readers.
  867. */
  868. static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
  869. {
  870. return 0;
  871. }
  872. #ifdef CONFIG_HOTPLUG_CPU
  873. /* Because preemptible RCU does not exist, no quieting of tasks. */
  874. static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
  875. {
  876. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  877. }
  878. #endif /* #ifdef CONFIG_HOTPLUG_CPU */
  879. /*
  880. * Because preemptible RCU does not exist, we never have to check for
  881. * tasks blocked within RCU read-side critical sections.
  882. */
  883. static void rcu_print_detail_task_stall(struct rcu_state *rsp)
  884. {
  885. }
  886. /*
  887. * Because preemptible RCU does not exist, we never have to check for
  888. * tasks blocked within RCU read-side critical sections.
  889. */
  890. static int rcu_print_task_stall(struct rcu_node *rnp)
  891. {
  892. return 0;
  893. }
  894. /*
  895. * Because there is no preemptible RCU, there can be no readers blocked,
  896. * so there is no need to check for blocked tasks. So check only for
  897. * bogus qsmask values.
  898. */
  899. static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
  900. {
  901. WARN_ON_ONCE(rnp->qsmask);
  902. }
  903. #ifdef CONFIG_HOTPLUG_CPU
  904. /*
  905. * Because preemptible RCU does not exist, it never needs to migrate
  906. * tasks that were blocked within RCU read-side critical sections, and
  907. * such non-existent tasks cannot possibly have been blocking the current
  908. * grace period.
  909. */
  910. static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
  911. struct rcu_node *rnp,
  912. struct rcu_data *rdp)
  913. {
  914. return 0;
  915. }
  916. #endif /* #ifdef CONFIG_HOTPLUG_CPU */
  917. /*
  918. * Because preemptible RCU does not exist, it never has any callbacks
  919. * to check.
  920. */
  921. static void rcu_preempt_check_callbacks(int cpu)
  922. {
  923. }
  924. /*
  925. * Queue an RCU callback for lazy invocation after a grace period.
  926. * This will likely be later named something like "call_rcu_lazy()",
  927. * but this change will require some way of tagging the lazy RCU
  928. * callbacks in the list of pending callbacks. Until then, this
  929. * function may only be called from __kfree_rcu().
  930. *
  931. * Because there is no preemptible RCU, we use RCU-sched instead.
  932. */
  933. void kfree_call_rcu(struct rcu_head *head,
  934. void (*func)(struct rcu_head *rcu))
  935. {
  936. __call_rcu(head, func, &rcu_sched_state, -1, 1);
  937. }
  938. EXPORT_SYMBOL_GPL(kfree_call_rcu);
  939. /*
  940. * Wait for an rcu-preempt grace period, but make it happen quickly.
  941. * But because preemptible RCU does not exist, map to rcu-sched.
  942. */
  943. void synchronize_rcu_expedited(void)
  944. {
  945. synchronize_sched_expedited();
  946. }
  947. EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
  948. #ifdef CONFIG_HOTPLUG_CPU
  949. /*
  950. * Because preemptible RCU does not exist, there is never any need to
  951. * report on tasks preempted in RCU read-side critical sections during
  952. * expedited RCU grace periods.
  953. */
  954. static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
  955. bool wake)
  956. {
  957. }
  958. #endif /* #ifdef CONFIG_HOTPLUG_CPU */
  959. /*
  960. * Because preemptible RCU does not exist, rcu_barrier() is just
  961. * another name for rcu_barrier_sched().
  962. */
  963. void rcu_barrier(void)
  964. {
  965. rcu_barrier_sched();
  966. }
  967. EXPORT_SYMBOL_GPL(rcu_barrier);
  968. /*
  969. * Because preemptible RCU does not exist, it need not be initialized.
  970. */
  971. static void __init __rcu_init_preempt(void)
  972. {
  973. }
  974. #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
  975. #ifdef CONFIG_RCU_BOOST
  976. #include "rtmutex_common.h"
  977. #ifdef CONFIG_RCU_TRACE
  978. static void rcu_initiate_boost_trace(struct rcu_node *rnp)
  979. {
  980. if (list_empty(&rnp->blkd_tasks))
  981. rnp->n_balk_blkd_tasks++;
  982. else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
  983. rnp->n_balk_exp_gp_tasks++;
  984. else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
  985. rnp->n_balk_boost_tasks++;
  986. else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
  987. rnp->n_balk_notblocked++;
  988. else if (rnp->gp_tasks != NULL &&
  989. ULONG_CMP_LT(jiffies, rnp->boost_time))
  990. rnp->n_balk_notyet++;
  991. else
  992. rnp->n_balk_nos++;
  993. }
  994. #else /* #ifdef CONFIG_RCU_TRACE */
  995. static void rcu_initiate_boost_trace(struct rcu_node *rnp)
  996. {
  997. }
  998. #endif /* #else #ifdef CONFIG_RCU_TRACE */
  999. static void rcu_wake_cond(struct task_struct *t, int status)
  1000. {
  1001. /*
  1002. * If the thread is yielding, only wake it when this
  1003. * is invoked from idle
  1004. */
  1005. if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
  1006. wake_up_process(t);
  1007. }
  1008. /*
  1009. * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  1010. * or ->boost_tasks, advancing the pointer to the next task in the
  1011. * ->blkd_tasks list.
  1012. *
  1013. * Note that irqs must be enabled: boosting the task can block.
  1014. * Returns 1 if there are more tasks needing to be boosted.
  1015. */
  1016. static int rcu_boost(struct rcu_node *rnp)
  1017. {
  1018. unsigned long flags;
  1019. struct rt_mutex mtx;
  1020. struct task_struct *t;
  1021. struct list_head *tb;
  1022. if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
  1023. return 0; /* Nothing left to boost. */
  1024. raw_spin_lock_irqsave(&rnp->lock, flags);
  1025. /*
  1026. * Recheck under the lock: all tasks in need of boosting
  1027. * might exit their RCU read-side critical sections on their own.
  1028. */
  1029. if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
  1030. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1031. return 0;
  1032. }
  1033. /*
  1034. * Preferentially boost tasks blocking expedited grace periods.
  1035. * This cannot starve the normal grace periods because a second
  1036. * expedited grace period must boost all blocked tasks, including
  1037. * those blocking the pre-existing normal grace period.
  1038. */
  1039. if (rnp->exp_tasks != NULL) {
  1040. tb = rnp->exp_tasks;
  1041. rnp->n_exp_boosts++;
  1042. } else {
  1043. tb = rnp->boost_tasks;
  1044. rnp->n_normal_boosts++;
  1045. }
  1046. rnp->n_tasks_boosted++;
  1047. /*
  1048. * We boost task t by manufacturing an rt_mutex that appears to
  1049. * be held by task t. We leave a pointer to that rt_mutex where
  1050. * task t can find it, and task t will release the mutex when it
  1051. * exits its outermost RCU read-side critical section. Then
  1052. * simply acquiring this artificial rt_mutex will boost task
  1053. * t's priority. (Thanks to tglx for suggesting this approach!)
  1054. *
  1055. * Note that task t must acquire rnp->lock to remove itself from
  1056. * the ->blkd_tasks list, which it will do from exit() if from
  1057. * nowhere else. We therefore are guaranteed that task t will
  1058. * stay around at least until we drop rnp->lock. Note that
  1059. * rnp->lock also resolves races between our priority boosting
  1060. * and task t's exiting its outermost RCU read-side critical
  1061. * section.
  1062. */
  1063. t = container_of(tb, struct task_struct, rcu_node_entry);
  1064. rt_mutex_init_proxy_locked(&mtx, t);
  1065. t->rcu_boost_mutex = &mtx;
  1066. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1067. rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
  1068. rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
  1069. return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
  1070. ACCESS_ONCE(rnp->boost_tasks) != NULL;
  1071. }
  1072. /*
  1073. * Priority-boosting kthread. One per leaf rcu_node and one for the
  1074. * root rcu_node.
  1075. */
  1076. static int rcu_boost_kthread(void *arg)
  1077. {
  1078. struct rcu_node *rnp = (struct rcu_node *)arg;
  1079. int spincnt = 0;
  1080. int more2boost;
  1081. trace_rcu_utilization("Start boost kthread@init");
  1082. for (;;) {
  1083. rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
  1084. trace_rcu_utilization("End boost kthread@rcu_wait");
  1085. rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
  1086. trace_rcu_utilization("Start boost kthread@rcu_wait");
  1087. rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
  1088. more2boost = rcu_boost(rnp);
  1089. if (more2boost)
  1090. spincnt++;
  1091. else
  1092. spincnt = 0;
  1093. if (spincnt > 10) {
  1094. rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
  1095. trace_rcu_utilization("End boost kthread@rcu_yield");
  1096. schedule_timeout_interruptible(2);
  1097. trace_rcu_utilization("Start boost kthread@rcu_yield");
  1098. spincnt = 0;
  1099. }
  1100. }
  1101. /* NOTREACHED */
  1102. trace_rcu_utilization("End boost kthread@notreached");
  1103. return 0;
  1104. }
  1105. /*
  1106. * Check to see if it is time to start boosting RCU readers that are
  1107. * blocking the current grace period, and, if so, tell the per-rcu_node
  1108. * kthread to start boosting them. If there is an expedited grace
  1109. * period in progress, it is always time to boost.
  1110. *
  1111. * The caller must hold rnp->lock, which this function releases.
  1112. * The ->boost_kthread_task is immortal, so we don't need to worry
  1113. * about it going away.
  1114. */
  1115. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  1116. {
  1117. struct task_struct *t;
  1118. if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
  1119. rnp->n_balk_exp_gp_tasks++;
  1120. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1121. return;
  1122. }
  1123. if (rnp->exp_tasks != NULL ||
  1124. (rnp->gp_tasks != NULL &&
  1125. rnp->boost_tasks == NULL &&
  1126. rnp->qsmask == 0 &&
  1127. ULONG_CMP_GE(jiffies, rnp->boost_time))) {
  1128. if (rnp->exp_tasks == NULL)
  1129. rnp->boost_tasks = rnp->gp_tasks;
  1130. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1131. t = rnp->boost_kthread_task;
  1132. if (t)
  1133. rcu_wake_cond(t, rnp->boost_kthread_status);
  1134. } else {
  1135. rcu_initiate_boost_trace(rnp);
  1136. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1137. }
  1138. }
  1139. /*
  1140. * Wake up the per-CPU kthread to invoke RCU callbacks.
  1141. */
  1142. static void invoke_rcu_callbacks_kthread(void)
  1143. {
  1144. unsigned long flags;
  1145. local_irq_save(flags);
  1146. __this_cpu_write(rcu_cpu_has_work, 1);
  1147. if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
  1148. current != __this_cpu_read(rcu_cpu_kthread_task)) {
  1149. rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
  1150. __this_cpu_read(rcu_cpu_kthread_status));
  1151. }
  1152. local_irq_restore(flags);
  1153. }
  1154. /*
  1155. * Is the current CPU running the RCU-callbacks kthread?
  1156. * Caller must have preemption disabled.
  1157. */
  1158. static bool rcu_is_callbacks_kthread(void)
  1159. {
  1160. return __get_cpu_var(rcu_cpu_kthread_task) == current;
  1161. }
  1162. #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
  1163. /*
  1164. * Do priority-boost accounting for the start of a new grace period.
  1165. */
  1166. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
  1167. {
  1168. rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
  1169. }
  1170. /*
  1171. * Create an RCU-boost kthread for the specified node if one does not
  1172. * already exist. We only create this kthread for preemptible RCU.
  1173. * Returns zero if all is well, a negated errno otherwise.
  1174. */
  1175. static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
  1176. struct rcu_node *rnp)
  1177. {
  1178. int rnp_index = rnp - &rsp->node[0];
  1179. unsigned long flags;
  1180. struct sched_param sp;
  1181. struct task_struct *t;
  1182. if (&rcu_preempt_state != rsp)
  1183. return 0;
  1184. if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0)
  1185. return 0;
  1186. rsp->boost = 1;
  1187. if (rnp->boost_kthread_task != NULL)
  1188. return 0;
  1189. t = kthread_create(rcu_boost_kthread, (void *)rnp,
  1190. "rcub/%d", rnp_index);
  1191. if (IS_ERR(t))
  1192. return PTR_ERR(t);
  1193. raw_spin_lock_irqsave(&rnp->lock, flags);
  1194. rnp->boost_kthread_task = t;
  1195. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1196. sp.sched_priority = RCU_BOOST_PRIO;
  1197. sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
  1198. wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
  1199. return 0;
  1200. }
  1201. static void rcu_kthread_do_work(void)
  1202. {
  1203. rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
  1204. rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
  1205. rcu_preempt_do_callbacks();
  1206. }
  1207. static void rcu_cpu_kthread_setup(unsigned int cpu)
  1208. {
  1209. struct sched_param sp;
  1210. sp.sched_priority = RCU_KTHREAD_PRIO;
  1211. sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
  1212. }
  1213. static void rcu_cpu_kthread_park(unsigned int cpu)
  1214. {
  1215. per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
  1216. }
  1217. static int rcu_cpu_kthread_should_run(unsigned int cpu)
  1218. {
  1219. return __get_cpu_var(rcu_cpu_has_work);
  1220. }
  1221. /*
  1222. * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
  1223. * RCU softirq used in flavors and configurations of RCU that do not
  1224. * support RCU priority boosting.
  1225. */
  1226. static void rcu_cpu_kthread(unsigned int cpu)
  1227. {
  1228. unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
  1229. char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
  1230. int spincnt;
  1231. for (spincnt = 0; spincnt < 10; spincnt++) {
  1232. trace_rcu_utilization("Start CPU kthread@rcu_wait");
  1233. local_bh_disable();
  1234. *statusp = RCU_KTHREAD_RUNNING;
  1235. this_cpu_inc(rcu_cpu_kthread_loops);
  1236. local_irq_disable();
  1237. work = *workp;
  1238. *workp = 0;
  1239. local_irq_enable();
  1240. if (work)
  1241. rcu_kthread_do_work();
  1242. local_bh_enable();
  1243. if (*workp == 0) {
  1244. trace_rcu_utilization("End CPU kthread@rcu_wait");
  1245. *statusp = RCU_KTHREAD_WAITING;
  1246. return;
  1247. }
  1248. }
  1249. *statusp = RCU_KTHREAD_YIELDING;
  1250. trace_rcu_utilization("Start CPU kthread@rcu_yield");
  1251. schedule_timeout_interruptible(2);
  1252. trace_rcu_utilization("End CPU kthread@rcu_yield");
  1253. *statusp = RCU_KTHREAD_WAITING;
  1254. }
  1255. /*
  1256. * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  1257. * served by the rcu_node in question. The CPU hotplug lock is still
  1258. * held, so the value of rnp->qsmaskinit will be stable.
  1259. *
  1260. * We don't include outgoingcpu in the affinity set, use -1 if there is
  1261. * no outgoing CPU. If there are no CPUs left in the affinity set,
  1262. * this function allows the kthread to execute on any CPU.
  1263. */
  1264. static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
  1265. {
  1266. struct task_struct *t = rnp->boost_kthread_task;
  1267. unsigned long mask = rnp->qsmaskinit;
  1268. cpumask_var_t cm;
  1269. int cpu;
  1270. if (!t)
  1271. return;
  1272. if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
  1273. return;
  1274. for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
  1275. if ((mask & 0x1) && cpu != outgoingcpu)
  1276. cpumask_set_cpu(cpu, cm);
  1277. if (cpumask_weight(cm) == 0) {
  1278. cpumask_setall(cm);
  1279. for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
  1280. cpumask_clear_cpu(cpu, cm);
  1281. WARN_ON_ONCE(cpumask_weight(cm) == 0);
  1282. }
  1283. set_cpus_allowed_ptr(t, cm);
  1284. free_cpumask_var(cm);
  1285. }
  1286. static struct smp_hotplug_thread rcu_cpu_thread_spec = {
  1287. .store = &rcu_cpu_kthread_task,
  1288. .thread_should_run = rcu_cpu_kthread_should_run,
  1289. .thread_fn = rcu_cpu_kthread,
  1290. .thread_comm = "rcuc/%u",
  1291. .setup = rcu_cpu_kthread_setup,
  1292. .park = rcu_cpu_kthread_park,
  1293. };
  1294. /*
  1295. * Spawn all kthreads -- called as soon as the scheduler is running.
  1296. */
  1297. static int __init rcu_spawn_kthreads(void)
  1298. {
  1299. struct rcu_node *rnp;
  1300. int cpu;
  1301. rcu_scheduler_fully_active = 1;
  1302. for_each_possible_cpu(cpu)
  1303. per_cpu(rcu_cpu_has_work, cpu) = 0;
  1304. BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
  1305. rnp = rcu_get_root(rcu_state);
  1306. (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
  1307. if (NUM_RCU_NODES > 1) {
  1308. rcu_for_each_leaf_node(rcu_state, rnp)
  1309. (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
  1310. }
  1311. return 0;
  1312. }
  1313. early_initcall(rcu_spawn_kthreads);
  1314. static void __cpuinit rcu_prepare_kthreads(int cpu)
  1315. {
  1316. struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
  1317. struct rcu_node *rnp = rdp->mynode;
  1318. /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
  1319. if (rcu_scheduler_fully_active)
  1320. (void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
  1321. }
  1322. #else /* #ifdef CONFIG_RCU_BOOST */
  1323. static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
  1324. {
  1325. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  1326. }
  1327. static void invoke_rcu_callbacks_kthread(void)
  1328. {
  1329. WARN_ON_ONCE(1);
  1330. }
  1331. static bool rcu_is_callbacks_kthread(void)
  1332. {
  1333. return false;
  1334. }
  1335. static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
  1336. {
  1337. }
  1338. static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
  1339. {
  1340. }
  1341. static int __init rcu_scheduler_really_started(void)
  1342. {
  1343. rcu_scheduler_fully_active = 1;
  1344. return 0;
  1345. }
  1346. early_initcall(rcu_scheduler_really_started);
  1347. static void __cpuinit rcu_prepare_kthreads(int cpu)
  1348. {
  1349. }
  1350. #endif /* #else #ifdef CONFIG_RCU_BOOST */
  1351. #if !defined(CONFIG_RCU_FAST_NO_HZ)
  1352. /*
  1353. * Check to see if any future RCU-related work will need to be done
  1354. * by the current CPU, even if none need be done immediately, returning
  1355. * 1 if so. This function is part of the RCU implementation; it is -not-
  1356. * an exported member of the RCU API.
  1357. *
  1358. * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
  1359. * any flavor of RCU.
  1360. */
  1361. int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
  1362. {
  1363. *delta_jiffies = ULONG_MAX;
  1364. return rcu_cpu_has_callbacks(cpu);
  1365. }
  1366. /*
  1367. * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
  1368. */
  1369. static void rcu_prepare_for_idle_init(int cpu)
  1370. {
  1371. }
  1372. /*
  1373. * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up
  1374. * after it.
  1375. */
  1376. static void rcu_cleanup_after_idle(int cpu)
  1377. {
  1378. }
  1379. /*
  1380. * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
  1381. * is nothing.
  1382. */
  1383. static void rcu_prepare_for_idle(int cpu)
  1384. {
  1385. }
  1386. /*
  1387. * Don't bother keeping a running count of the number of RCU callbacks
  1388. * posted because CONFIG_RCU_FAST_NO_HZ=n.
  1389. */
  1390. static void rcu_idle_count_callbacks_posted(void)
  1391. {
  1392. }
  1393. #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
  1394. /*
  1395. * This code is invoked when a CPU goes idle, at which point we want
  1396. * to have the CPU do everything required for RCU so that it can enter
  1397. * the energy-efficient dyntick-idle mode. This is handled by a
  1398. * state machine implemented by rcu_prepare_for_idle() below.
  1399. *
  1400. * The following three proprocessor symbols control this state machine:
  1401. *
  1402. * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
  1403. * to satisfy RCU. Beyond this point, it is better to incur a periodic
  1404. * scheduling-clock interrupt than to loop through the state machine
  1405. * at full power.
  1406. * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
  1407. * optional if RCU does not need anything immediately from this
  1408. * CPU, even if this CPU still has RCU callbacks queued. The first
  1409. * times through the state machine are mandatory: we need to give
  1410. * the state machine a chance to communicate a quiescent state
  1411. * to the RCU core.
  1412. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
  1413. * to sleep in dyntick-idle mode with RCU callbacks pending. This
  1414. * is sized to be roughly one RCU grace period. Those energy-efficiency
  1415. * benchmarkers who might otherwise be tempted to set this to a large
  1416. * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
  1417. * system. And if you are -that- concerned about energy efficiency,
  1418. * just power the system down and be done with it!
  1419. * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
  1420. * permitted to sleep in dyntick-idle mode with only lazy RCU
  1421. * callbacks pending. Setting this too high can OOM your system.
  1422. *
  1423. * The values below work well in practice. If future workloads require
  1424. * adjustment, they can be converted into kernel config parameters, though
  1425. * making the state machine smarter might be a better option.
  1426. */
  1427. #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
  1428. #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
  1429. #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
  1430. #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
  1431. extern int tick_nohz_enabled;
  1432. /*
  1433. * Does the specified flavor of RCU have non-lazy callbacks pending on
  1434. * the specified CPU? Both RCU flavor and CPU are specified by the
  1435. * rcu_data structure.
  1436. */
  1437. static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
  1438. {
  1439. return rdp->qlen != rdp->qlen_lazy;
  1440. }
  1441. #ifdef CONFIG_TREE_PREEMPT_RCU
  1442. /*
  1443. * Are there non-lazy RCU-preempt callbacks? (There cannot be if there
  1444. * is no RCU-preempt in the kernel.)
  1445. */
  1446. static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
  1447. {
  1448. struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
  1449. return __rcu_cpu_has_nonlazy_callbacks(rdp);
  1450. }
  1451. #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
  1452. static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
  1453. {
  1454. return 0;
  1455. }
  1456. #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */
  1457. /*
  1458. * Does any flavor of RCU have non-lazy callbacks on the specified CPU?
  1459. */
  1460. static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
  1461. {
  1462. return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
  1463. __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
  1464. rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
  1465. }
  1466. /*
  1467. * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
  1468. * callbacks on this CPU, (2) this CPU has not yet attempted to enter
  1469. * dyntick-idle mode, or (3) this CPU is in the process of attempting to
  1470. * enter dyntick-idle mode. Otherwise, if we have recently tried and failed
  1471. * to enter dyntick-idle mode, we refuse to try to enter it. After all,
  1472. * it is better to incur scheduling-clock interrupts than to spin
  1473. * continuously for the same time duration!
  1474. *
  1475. * The delta_jiffies argument is used to store the time when RCU is
  1476. * going to need the CPU again if it still has callbacks. The reason
  1477. * for this is that rcu_prepare_for_idle() might need to post a timer,
  1478. * but if so, it will do so after tick_nohz_stop_sched_tick() has set
  1479. * the wakeup time for this CPU. This means that RCU's timer can be
  1480. * delayed until the wakeup time, which defeats the purpose of posting
  1481. * a timer.
  1482. */
  1483. int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
  1484. {
  1485. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  1486. /* Flag a new idle sojourn to the idle-entry state machine. */
  1487. rdtp->idle_first_pass = 1;
  1488. /* If no callbacks, RCU doesn't need the CPU. */
  1489. if (!rcu_cpu_has_callbacks(cpu)) {
  1490. *delta_jiffies = ULONG_MAX;
  1491. return 0;
  1492. }
  1493. if (rdtp->dyntick_holdoff == jiffies) {
  1494. /* RCU recently tried and failed, so don't try again. */
  1495. *delta_jiffies = 1;
  1496. return 1;
  1497. }
  1498. /* Set up for the possibility that RCU will post a timer. */
  1499. if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
  1500. *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
  1501. RCU_IDLE_GP_DELAY) - jiffies;
  1502. } else {
  1503. *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
  1504. *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
  1505. }
  1506. return 0;
  1507. }
  1508. /*
  1509. * Handler for smp_call_function_single(). The only point of this
  1510. * handler is to wake the CPU up, so the handler does only tracing.
  1511. */
  1512. void rcu_idle_demigrate(void *unused)
  1513. {
  1514. trace_rcu_prep_idle("Demigrate");
  1515. }
  1516. /*
  1517. * Timer handler used to force CPU to start pushing its remaining RCU
  1518. * callbacks in the case where it entered dyntick-idle mode with callbacks
  1519. * pending. The hander doesn't really need to do anything because the
  1520. * real work is done upon re-entry to idle, or by the next scheduling-clock
  1521. * interrupt should idle not be re-entered.
  1522. *
  1523. * One special case: the timer gets migrated without awakening the CPU
  1524. * on which the timer was scheduled on. In this case, we must wake up
  1525. * that CPU. We do so with smp_call_function_single().
  1526. */
  1527. static void rcu_idle_gp_timer_func(unsigned long cpu_in)
  1528. {
  1529. int cpu = (int)cpu_in;
  1530. trace_rcu_prep_idle("Timer");
  1531. if (cpu != smp_processor_id())
  1532. smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0);
  1533. else
  1534. WARN_ON_ONCE(1); /* Getting here can hang the system... */
  1535. }
  1536. /*
  1537. * Initialize the timer used to pull CPUs out of dyntick-idle mode.
  1538. */
  1539. static void rcu_prepare_for_idle_init(int cpu)
  1540. {
  1541. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  1542. rdtp->dyntick_holdoff = jiffies - 1;
  1543. setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
  1544. rdtp->idle_gp_timer_expires = jiffies - 1;
  1545. rdtp->idle_first_pass = 1;
  1546. }
  1547. /*
  1548. * Clean up for exit from idle. Because we are exiting from idle, there
  1549. * is no longer any point to ->idle_gp_timer, so cancel it. This will
  1550. * do nothing if this timer is not active, so just cancel it unconditionally.
  1551. */
  1552. static void rcu_cleanup_after_idle(int cpu)
  1553. {
  1554. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  1555. del_timer(&rdtp->idle_gp_timer);
  1556. trace_rcu_prep_idle("Cleanup after idle");
  1557. rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
  1558. }
  1559. /*
  1560. * Check to see if any RCU-related work can be done by the current CPU,
  1561. * and if so, schedule a softirq to get it done. This function is part
  1562. * of the RCU implementation; it is -not- an exported member of the RCU API.
  1563. *
  1564. * The idea is for the current CPU to clear out all work required by the
  1565. * RCU core for the current grace period, so that this CPU can be permitted
  1566. * to enter dyntick-idle mode. In some cases, it will need to be awakened
  1567. * at the end of the grace period by whatever CPU ends the grace period.
  1568. * This allows CPUs to go dyntick-idle more quickly, and to reduce the
  1569. * number of wakeups by a modest integer factor.
  1570. *
  1571. * Because it is not legal to invoke rcu_process_callbacks() with irqs
  1572. * disabled, we do one pass of force_quiescent_state(), then do a
  1573. * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
  1574. * later. The ->dyntick_drain field controls the sequencing.
  1575. *
  1576. * The caller must have disabled interrupts.
  1577. */
  1578. static void rcu_prepare_for_idle(int cpu)
  1579. {
  1580. struct timer_list *tp;
  1581. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  1582. int tne;
  1583. /* Handle nohz enablement switches conservatively. */
  1584. tne = ACCESS_ONCE(tick_nohz_enabled);
  1585. if (tne != rdtp->tick_nohz_enabled_snap) {
  1586. if (rcu_cpu_has_callbacks(cpu))
  1587. invoke_rcu_core(); /* force nohz to see update. */
  1588. rdtp->tick_nohz_enabled_snap = tne;
  1589. return;
  1590. }
  1591. if (!tne)
  1592. return;
  1593. /* Adaptive-tick mode, where usermode execution is idle to RCU. */
  1594. if (!is_idle_task(current)) {
  1595. rdtp->dyntick_holdoff = jiffies - 1;
  1596. if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
  1597. trace_rcu_prep_idle("User dyntick with callbacks");
  1598. rdtp->idle_gp_timer_expires =
  1599. round_up(jiffies + RCU_IDLE_GP_DELAY,
  1600. RCU_IDLE_GP_DELAY);
  1601. } else if (rcu_cpu_has_callbacks(cpu)) {
  1602. rdtp->idle_gp_timer_expires =
  1603. round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
  1604. trace_rcu_prep_idle("User dyntick with lazy callbacks");
  1605. } else {
  1606. return;
  1607. }
  1608. tp = &rdtp->idle_gp_timer;
  1609. mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
  1610. return;
  1611. }
  1612. /*
  1613. * If this is an idle re-entry, for example, due to use of
  1614. * RCU_NONIDLE() or the new idle-loop tracing API within the idle
  1615. * loop, then don't take any state-machine actions, unless the
  1616. * momentary exit from idle queued additional non-lazy callbacks.
  1617. * Instead, repost the ->idle_gp_timer if this CPU has callbacks
  1618. * pending.
  1619. */
  1620. if (!rdtp->idle_first_pass &&
  1621. (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
  1622. if (rcu_cpu_has_callbacks(cpu)) {
  1623. tp = &rdtp->idle_gp_timer;
  1624. mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
  1625. }
  1626. return;
  1627. }
  1628. rdtp->idle_first_pass = 0;
  1629. rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
  1630. /*
  1631. * If there are no callbacks on this CPU, enter dyntick-idle mode.
  1632. * Also reset state to avoid prejudicing later attempts.
  1633. */
  1634. if (!rcu_cpu_has_callbacks(cpu)) {
  1635. rdtp->dyntick_holdoff = jiffies - 1;
  1636. rdtp->dyntick_drain = 0;
  1637. trace_rcu_prep_idle("No callbacks");
  1638. return;
  1639. }
  1640. /*
  1641. * If in holdoff mode, just return. We will presumably have
  1642. * refrained from disabling the scheduling-clock tick.
  1643. */
  1644. if (rdtp->dyntick_holdoff == jiffies) {
  1645. trace_rcu_prep_idle("In holdoff");
  1646. return;
  1647. }
  1648. /* Check and update the ->dyntick_drain sequencing. */
  1649. if (rdtp->dyntick_drain <= 0) {
  1650. /* First time through, initialize the counter. */
  1651. rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
  1652. } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
  1653. !rcu_pending(cpu) &&
  1654. !local_softirq_pending()) {
  1655. /* Can we go dyntick-idle despite still having callbacks? */
  1656. rdtp->dyntick_drain = 0;
  1657. rdtp->dyntick_holdoff = jiffies;
  1658. if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
  1659. trace_rcu_prep_idle("Dyntick with callbacks");
  1660. rdtp->idle_gp_timer_expires =
  1661. round_up(jiffies + RCU_IDLE_GP_DELAY,
  1662. RCU_IDLE_GP_DELAY);
  1663. } else {
  1664. rdtp->idle_gp_timer_expires =
  1665. round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
  1666. trace_rcu_prep_idle("Dyntick with lazy callbacks");
  1667. }
  1668. tp = &rdtp->idle_gp_timer;
  1669. mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
  1670. rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
  1671. return; /* Nothing more to do immediately. */
  1672. } else if (--(rdtp->dyntick_drain) <= 0) {
  1673. /* We have hit the limit, so time to give up. */
  1674. rdtp->dyntick_holdoff = jiffies;
  1675. trace_rcu_prep_idle("Begin holdoff");
  1676. invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
  1677. return;
  1678. }
  1679. /*
  1680. * Do one step of pushing the remaining RCU callbacks through
  1681. * the RCU core state machine.
  1682. */
  1683. #ifdef CONFIG_TREE_PREEMPT_RCU
  1684. if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
  1685. rcu_preempt_qs(cpu);
  1686. force_quiescent_state(&rcu_preempt_state);
  1687. }
  1688. #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
  1689. if (per_cpu(rcu_sched_data, cpu).nxtlist) {
  1690. rcu_sched_qs(cpu);
  1691. force_quiescent_state(&rcu_sched_state);
  1692. }
  1693. if (per_cpu(rcu_bh_data, cpu).nxtlist) {
  1694. rcu_bh_qs(cpu);
  1695. force_quiescent_state(&rcu_bh_state);
  1696. }
  1697. /*
  1698. * If RCU callbacks are still pending, RCU still needs this CPU.
  1699. * So try forcing the callbacks through the grace period.
  1700. */
  1701. if (rcu_cpu_has_callbacks(cpu)) {
  1702. trace_rcu_prep_idle("More callbacks");
  1703. invoke_rcu_core();
  1704. } else {
  1705. trace_rcu_prep_idle("Callbacks drained");
  1706. }
  1707. }
  1708. /*
  1709. * Keep a running count of the number of non-lazy callbacks posted
  1710. * on this CPU. This running counter (which is never decremented) allows
  1711. * rcu_prepare_for_idle() to detect when something out of the idle loop
  1712. * posts a callback, even if an equal number of callbacks are invoked.
  1713. * Of course, callbacks should only be posted from within a trace event
  1714. * designed to be called from idle or from within RCU_NONIDLE().
  1715. */
  1716. static void rcu_idle_count_callbacks_posted(void)
  1717. {
  1718. __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
  1719. }
  1720. /*
  1721. * Data for flushing lazy RCU callbacks at OOM time.
  1722. */
  1723. static atomic_t oom_callback_count;
  1724. static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
  1725. /*
  1726. * RCU OOM callback -- decrement the outstanding count and deliver the
  1727. * wake-up if we are the last one.
  1728. */
  1729. static void rcu_oom_callback(struct rcu_head *rhp)
  1730. {
  1731. if (atomic_dec_and_test(&oom_callback_count))
  1732. wake_up(&oom_callback_wq);
  1733. }
  1734. /*
  1735. * Post an rcu_oom_notify callback on the current CPU if it has at
  1736. * least one lazy callback. This will unnecessarily post callbacks
  1737. * to CPUs that already have a non-lazy callback at the end of their
  1738. * callback list, but this is an infrequent operation, so accept some
  1739. * extra overhead to keep things simple.
  1740. */
  1741. static void rcu_oom_notify_cpu(void *unused)
  1742. {
  1743. struct rcu_state *rsp;
  1744. struct rcu_data *rdp;
  1745. for_each_rcu_flavor(rsp) {
  1746. rdp = __this_cpu_ptr(rsp->rda);
  1747. if (rdp->qlen_lazy != 0) {
  1748. atomic_inc(&oom_callback_count);
  1749. rsp->call(&rdp->oom_head, rcu_oom_callback);
  1750. }
  1751. }
  1752. }
  1753. /*
  1754. * If low on memory, ensure that each CPU has a non-lazy callback.
  1755. * This will wake up CPUs that have only lazy callbacks, in turn
  1756. * ensuring that they free up the corresponding memory in a timely manner.
  1757. * Because an uncertain amount of memory will be freed in some uncertain
  1758. * timeframe, we do not claim to have freed anything.
  1759. */
  1760. static int rcu_oom_notify(struct notifier_block *self,
  1761. unsigned long notused, void *nfreed)
  1762. {
  1763. int cpu;
  1764. /* Wait for callbacks from earlier instance to complete. */
  1765. wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
  1766. /*
  1767. * Prevent premature wakeup: ensure that all increments happen
  1768. * before there is a chance of the counter reaching zero.
  1769. */
  1770. atomic_set(&oom_callback_count, 1);
  1771. get_online_cpus();
  1772. for_each_online_cpu(cpu) {
  1773. smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
  1774. cond_resched();
  1775. }
  1776. put_online_cpus();
  1777. /* Unconditionally decrement: no need to wake ourselves up. */
  1778. atomic_dec(&oom_callback_count);
  1779. return NOTIFY_OK;
  1780. }
  1781. static struct notifier_block rcu_oom_nb = {
  1782. .notifier_call = rcu_oom_notify
  1783. };
  1784. static int __init rcu_register_oom_notifier(void)
  1785. {
  1786. register_oom_notifier(&rcu_oom_nb);
  1787. return 0;
  1788. }
  1789. early_initcall(rcu_register_oom_notifier);
  1790. #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
  1791. #ifdef CONFIG_RCU_CPU_STALL_INFO
  1792. #ifdef CONFIG_RCU_FAST_NO_HZ
  1793. static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
  1794. {
  1795. struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
  1796. struct timer_list *tltp = &rdtp->idle_gp_timer;
  1797. char c;
  1798. c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.';
  1799. if (timer_pending(tltp))
  1800. sprintf(cp, "drain=%d %c timer=%lu",
  1801. rdtp->dyntick_drain, c, tltp->expires - jiffies);
  1802. else
  1803. sprintf(cp, "drain=%d %c timer not pending",
  1804. rdtp->dyntick_drain, c);
  1805. }
  1806. #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
  1807. static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
  1808. {
  1809. *cp = '\0';
  1810. }
  1811. #endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
  1812. /* Initiate the stall-info list. */
  1813. static void print_cpu_stall_info_begin(void)
  1814. {
  1815. printk(KERN_CONT "\n");
  1816. }
  1817. /*
  1818. * Print out diagnostic information for the specified stalled CPU.
  1819. *
  1820. * If the specified CPU is aware of the current RCU grace period
  1821. * (flavor specified by rsp), then print the number of scheduling
  1822. * clock interrupts the CPU has taken during the time that it has
  1823. * been aware. Otherwise, print the number of RCU grace periods
  1824. * that this CPU is ignorant of, for example, "1" if the CPU was
  1825. * aware of the previous grace period.
  1826. *
  1827. * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
  1828. */
  1829. static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
  1830. {
  1831. char fast_no_hz[72];
  1832. struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
  1833. struct rcu_dynticks *rdtp = rdp->dynticks;
  1834. char *ticks_title;
  1835. unsigned long ticks_value;
  1836. if (rsp->gpnum == rdp->gpnum) {
  1837. ticks_title = "ticks this GP";
  1838. ticks_value = rdp->ticks_this_gp;
  1839. } else {
  1840. ticks_title = "GPs behind";
  1841. ticks_value = rsp->gpnum - rdp->gpnum;
  1842. }
  1843. print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
  1844. printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
  1845. cpu, ticks_value, ticks_title,
  1846. atomic_read(&rdtp->dynticks) & 0xfff,
  1847. rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
  1848. fast_no_hz);
  1849. }
  1850. /* Terminate the stall-info list. */
  1851. static void print_cpu_stall_info_end(void)
  1852. {
  1853. printk(KERN_ERR "\t");
  1854. }
  1855. /* Zero ->ticks_this_gp for all flavors of RCU. */
  1856. static void zero_cpu_stall_ticks(struct rcu_data *rdp)
  1857. {
  1858. rdp->ticks_this_gp = 0;
  1859. }
  1860. /* Increment ->ticks_this_gp for all flavors of RCU. */
  1861. static void increment_cpu_stall_ticks(void)
  1862. {
  1863. struct rcu_state *rsp;
  1864. for_each_rcu_flavor(rsp)
  1865. __this_cpu_ptr(rsp->rda)->ticks_this_gp++;
  1866. }
  1867. #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
  1868. static void print_cpu_stall_info_begin(void)
  1869. {
  1870. printk(KERN_CONT " {");
  1871. }
  1872. static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
  1873. {
  1874. printk(KERN_CONT " %d", cpu);
  1875. }
  1876. static void print_cpu_stall_info_end(void)
  1877. {
  1878. printk(KERN_CONT "} ");
  1879. }
  1880. static void zero_cpu_stall_ticks(struct rcu_data *rdp)
  1881. {
  1882. }
  1883. static void increment_cpu_stall_ticks(void)
  1884. {
  1885. }
  1886. #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
  1887. #ifdef CONFIG_RCU_NOCB_CPU
  1888. /*
  1889. * Offload callback processing from the boot-time-specified set of CPUs
  1890. * specified by rcu_nocb_mask. For each CPU in the set, there is a
  1891. * kthread created that pulls the callbacks from the corresponding CPU,
  1892. * waits for a grace period to elapse, and invokes the callbacks.
  1893. * The no-CBs CPUs do a wake_up() on their kthread when they insert
  1894. * a callback into any empty list, unless the rcu_nocb_poll boot parameter
  1895. * has been specified, in which case each kthread actively polls its
  1896. * CPU. (Which isn't so great for energy efficiency, but which does
  1897. * reduce RCU's overhead on that CPU.)
  1898. *
  1899. * This is intended to be used in conjunction with Frederic Weisbecker's
  1900. * adaptive-idle work, which would seriously reduce OS jitter on CPUs
  1901. * running CPU-bound user-mode computations.
  1902. *
  1903. * Offloading of callback processing could also in theory be used as
  1904. * an energy-efficiency measure because CPUs with no RCU callbacks
  1905. * queued are more aggressive about entering dyntick-idle mode.
  1906. */
  1907. /* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
  1908. static int __init rcu_nocb_setup(char *str)
  1909. {
  1910. alloc_bootmem_cpumask_var(&rcu_nocb_mask);
  1911. have_rcu_nocb_mask = true;
  1912. cpulist_parse(str, rcu_nocb_mask);
  1913. return 1;
  1914. }
  1915. __setup("rcu_nocbs=", rcu_nocb_setup);
  1916. static int __init parse_rcu_nocb_poll(char *arg)
  1917. {
  1918. rcu_nocb_poll = 1;
  1919. return 0;
  1920. }
  1921. early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
  1922. /*
  1923. * Does this CPU needs a grace period due to offloaded callbacks?
  1924. */
  1925. static int rcu_nocb_needs_gp(struct rcu_data *rdp)
  1926. {
  1927. return rdp->nocb_needs_gp;
  1928. }
  1929. /* Is the specified CPU a no-CPUs CPU? */
  1930. static bool is_nocb_cpu(int cpu)
  1931. {
  1932. if (have_rcu_nocb_mask)
  1933. return cpumask_test_cpu(cpu, rcu_nocb_mask);
  1934. return false;
  1935. }
  1936. /*
  1937. * Enqueue the specified string of rcu_head structures onto the specified
  1938. * CPU's no-CBs lists. The CPU is specified by rdp, the head of the
  1939. * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy
  1940. * counts are supplied by rhcount and rhcount_lazy.
  1941. *
  1942. * If warranted, also wake up the kthread servicing this CPUs queues.
  1943. */
  1944. static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
  1945. struct rcu_head *rhp,
  1946. struct rcu_head **rhtp,
  1947. int rhcount, int rhcount_lazy)
  1948. {
  1949. int len;
  1950. struct rcu_head **old_rhpp;
  1951. struct task_struct *t;
  1952. /* Enqueue the callback on the nocb list and update counts. */
  1953. old_rhpp = xchg(&rdp->nocb_tail, rhtp);
  1954. ACCESS_ONCE(*old_rhpp) = rhp;
  1955. atomic_long_add(rhcount, &rdp->nocb_q_count);
  1956. atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
  1957. /* If we are not being polled and there is a kthread, awaken it ... */
  1958. t = ACCESS_ONCE(rdp->nocb_kthread);
  1959. if (rcu_nocb_poll | !t)
  1960. return;
  1961. len = atomic_long_read(&rdp->nocb_q_count);
  1962. if (old_rhpp == &rdp->nocb_head) {
  1963. wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
  1964. rdp->qlen_last_fqs_check = 0;
  1965. } else if (len > rdp->qlen_last_fqs_check + qhimark) {
  1966. wake_up_process(t); /* ... or if many callbacks queued. */
  1967. rdp->qlen_last_fqs_check = LONG_MAX / 2;
  1968. }
  1969. return;
  1970. }
  1971. /*
  1972. * This is a helper for __call_rcu(), which invokes this when the normal
  1973. * callback queue is inoperable. If this is not a no-CBs CPU, this
  1974. * function returns failure back to __call_rcu(), which can complain
  1975. * appropriately.
  1976. *
  1977. * Otherwise, this function queues the callback where the corresponding
  1978. * "rcuo" kthread can find it.
  1979. */
  1980. static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
  1981. bool lazy)
  1982. {
  1983. if (!is_nocb_cpu(rdp->cpu))
  1984. return 0;
  1985. __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
  1986. return 1;
  1987. }
  1988. /*
  1989. * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
  1990. * not a no-CBs CPU.
  1991. */
  1992. static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
  1993. struct rcu_data *rdp)
  1994. {
  1995. long ql = rsp->qlen;
  1996. long qll = rsp->qlen_lazy;
  1997. /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
  1998. if (!is_nocb_cpu(smp_processor_id()))
  1999. return 0;
  2000. rsp->qlen = 0;
  2001. rsp->qlen_lazy = 0;
  2002. /* First, enqueue the donelist, if any. This preserves CB ordering. */
  2003. if (rsp->orphan_donelist != NULL) {
  2004. __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
  2005. rsp->orphan_donetail, ql, qll);
  2006. ql = qll = 0;
  2007. rsp->orphan_donelist = NULL;
  2008. rsp->orphan_donetail = &rsp->orphan_donelist;
  2009. }
  2010. if (rsp->orphan_nxtlist != NULL) {
  2011. __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
  2012. rsp->orphan_nxttail, ql, qll);
  2013. ql = qll = 0;
  2014. rsp->orphan_nxtlist = NULL;
  2015. rsp->orphan_nxttail = &rsp->orphan_nxtlist;
  2016. }
  2017. return 1;
  2018. }
  2019. /*
  2020. * If necessary, kick off a new grace period, and either way wait
  2021. * for a subsequent grace period to complete.
  2022. */
  2023. static void rcu_nocb_wait_gp(struct rcu_data *rdp)
  2024. {
  2025. unsigned long c;
  2026. unsigned long flags;
  2027. unsigned long j;
  2028. struct rcu_node *rnp = rdp->mynode;
  2029. raw_spin_lock_irqsave(&rnp->lock, flags);
  2030. c = rnp->completed + 2;
  2031. rdp->nocb_needs_gp = true;
  2032. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  2033. /*
  2034. * Wait for the grace period. Do so interruptibly to avoid messing
  2035. * up the load average.
  2036. */
  2037. for (;;) {
  2038. j = jiffies;
  2039. schedule_timeout_interruptible(2);
  2040. raw_spin_lock_irqsave(&rnp->lock, flags);
  2041. if (ULONG_CMP_GE(rnp->completed, c)) {
  2042. rdp->nocb_needs_gp = false;
  2043. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  2044. break;
  2045. }
  2046. if (j == jiffies)
  2047. flush_signals(current);
  2048. raw_spin_unlock_irqrestore(&rnp->lock, flags);
  2049. }
  2050. smp_mb(); /* Ensure that CB invocation happens after GP end. */
  2051. }
  2052. /*
  2053. * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes
  2054. * callbacks queued by the corresponding no-CBs CPU.
  2055. */
  2056. static int rcu_nocb_kthread(void *arg)
  2057. {
  2058. int c, cl;
  2059. struct rcu_head *list;
  2060. struct rcu_head *next;
  2061. struct rcu_head **tail;
  2062. struct rcu_data *rdp = arg;
  2063. /* Each pass through this loop invokes one batch of callbacks */
  2064. for (;;) {
  2065. /* If not polling, wait for next batch of callbacks. */
  2066. if (!rcu_nocb_poll)
  2067. wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
  2068. list = ACCESS_ONCE(rdp->nocb_head);
  2069. if (!list) {
  2070. schedule_timeout_interruptible(1);
  2071. flush_signals(current);
  2072. continue;
  2073. }
  2074. /*
  2075. * Extract queued callbacks, update counts, and wait
  2076. * for a grace period to elapse.
  2077. */
  2078. ACCESS_ONCE(rdp->nocb_head) = NULL;
  2079. tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
  2080. c = atomic_long_xchg(&rdp->nocb_q_count, 0);
  2081. cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
  2082. ACCESS_ONCE(rdp->nocb_p_count) += c;
  2083. ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
  2084. rcu_nocb_wait_gp(rdp);
  2085. /* Each pass through the following loop invokes a callback. */
  2086. trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
  2087. c = cl = 0;
  2088. while (list) {
  2089. next = list->next;
  2090. /* Wait for enqueuing to complete, if needed. */
  2091. while (next == NULL && &list->next != tail) {
  2092. schedule_timeout_interruptible(1);
  2093. next = list->next;
  2094. }
  2095. debug_rcu_head_unqueue(list);
  2096. local_bh_disable();
  2097. if (__rcu_reclaim(rdp->rsp->name, list))
  2098. cl++;
  2099. c++;
  2100. local_bh_enable();
  2101. list = next;
  2102. }
  2103. trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
  2104. ACCESS_ONCE(rdp->nocb_p_count) -= c;
  2105. ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
  2106. rdp->n_nocbs_invoked += c;
  2107. }
  2108. return 0;
  2109. }
  2110. /* Initialize per-rcu_data variables for no-CBs CPUs. */
  2111. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  2112. {
  2113. rdp->nocb_tail = &rdp->nocb_head;
  2114. init_waitqueue_head(&rdp->nocb_wq);
  2115. }
  2116. /* Create a kthread for each RCU flavor for each no-CBs CPU. */
  2117. static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
  2118. {
  2119. int cpu;
  2120. struct rcu_data *rdp;
  2121. struct task_struct *t;
  2122. if (rcu_nocb_mask == NULL)
  2123. return;
  2124. for_each_cpu(cpu, rcu_nocb_mask) {
  2125. rdp = per_cpu_ptr(rsp->rda, cpu);
  2126. t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu);
  2127. BUG_ON(IS_ERR(t));
  2128. ACCESS_ONCE(rdp->nocb_kthread) = t;
  2129. }
  2130. }
  2131. /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
  2132. static bool init_nocb_callback_list(struct rcu_data *rdp)
  2133. {
  2134. if (rcu_nocb_mask == NULL ||
  2135. !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
  2136. return false;
  2137. rdp->nxttail[RCU_NEXT_TAIL] = NULL;
  2138. return true;
  2139. }
  2140. #else /* #ifdef CONFIG_RCU_NOCB_CPU */
  2141. static int rcu_nocb_needs_gp(struct rcu_data *rdp)
  2142. {
  2143. return 0;
  2144. }
  2145. static bool is_nocb_cpu(int cpu)
  2146. {
  2147. return false;
  2148. }
  2149. static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
  2150. bool lazy)
  2151. {
  2152. return 0;
  2153. }
  2154. static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
  2155. struct rcu_data *rdp)
  2156. {
  2157. return 0;
  2158. }
  2159. static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
  2160. {
  2161. }
  2162. static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
  2163. {
  2164. }
  2165. static bool init_nocb_callback_list(struct rcu_data *rdp)
  2166. {
  2167. return false;
  2168. }
  2169. #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */