xpc_main.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. /*
  9. * Cross Partition Communication (XPC) support - standard version.
  10. *
  11. * XPC provides a message passing capability that crosses partition
  12. * boundaries. This module is made up of two parts:
  13. *
  14. * partition This part detects the presence/absence of other
  15. * partitions. It provides a heartbeat and monitors
  16. * the heartbeats of other partitions.
  17. *
  18. * channel This part manages the channels and sends/receives
  19. * messages across them to/from other partitions.
  20. *
  21. * There are a couple of additional functions residing in XP, which
  22. * provide an interface to XPC for its users.
  23. *
  24. *
  25. * Caveats:
  26. *
  27. * . Currently on sn2, we have no way to determine which nasid an IRQ
  28. * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
  29. * followed by an IPI. The amo indicates where data is to be pulled
  30. * from, so after the IPI arrives, the remote partition checks the amo
  31. * word. The IPI can actually arrive before the amo however, so other
  32. * code must periodically check for this case. Also, remote amo
  33. * operations do not reliably time out. Thus we do a remote PIO read
  34. * solely to know whether the remote partition is down and whether we
  35. * should stop sending IPIs to it. This remote PIO read operation is
  36. * set up in a special nofault region so SAL knows to ignore (and
  37. * cleanup) any errors due to the remote amo write, PIO read, and/or
  38. * PIO write operations.
  39. *
  40. * If/when new hardware solves this IPI problem, we should abandon
  41. * the current approach.
  42. *
  43. */
  44. #include <linux/module.h>
  45. #include <linux/sysctl.h>
  46. #include <linux/device.h>
  47. #include <linux/delay.h>
  48. #include <linux/reboot.h>
  49. #include <linux/kdebug.h>
  50. #include <linux/kthread.h>
  51. #include "xpc.h"
  52. /* define two XPC debug device structures to be used with dev_dbg() et al */
  53. struct device_driver xpc_dbg_name = {
  54. .name = "xpc"
  55. };
  56. struct device xpc_part_dbg_subname = {
  57. .bus_id = {0}, /* set to "part" at xpc_init() time */
  58. .driver = &xpc_dbg_name
  59. };
  60. struct device xpc_chan_dbg_subname = {
  61. .bus_id = {0}, /* set to "chan" at xpc_init() time */
  62. .driver = &xpc_dbg_name
  63. };
  64. struct device *xpc_part = &xpc_part_dbg_subname;
  65. struct device *xpc_chan = &xpc_chan_dbg_subname;
  66. static int xpc_kdebug_ignore;
  67. /* systune related variables for /proc/sys directories */
  68. static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
  69. static int xpc_hb_min_interval = 1;
  70. static int xpc_hb_max_interval = 10;
  71. static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
  72. static int xpc_hb_check_min_interval = 10;
  73. static int xpc_hb_check_max_interval = 120;
  74. int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
  75. static int xpc_disengage_min_timelimit; /* = 0 */
  76. static int xpc_disengage_max_timelimit = 120;
  77. static ctl_table xpc_sys_xpc_hb_dir[] = {
  78. {
  79. .ctl_name = CTL_UNNUMBERED,
  80. .procname = "hb_interval",
  81. .data = &xpc_hb_interval,
  82. .maxlen = sizeof(int),
  83. .mode = 0644,
  84. .proc_handler = &proc_dointvec_minmax,
  85. .strategy = &sysctl_intvec,
  86. .extra1 = &xpc_hb_min_interval,
  87. .extra2 = &xpc_hb_max_interval},
  88. {
  89. .ctl_name = CTL_UNNUMBERED,
  90. .procname = "hb_check_interval",
  91. .data = &xpc_hb_check_interval,
  92. .maxlen = sizeof(int),
  93. .mode = 0644,
  94. .proc_handler = &proc_dointvec_minmax,
  95. .strategy = &sysctl_intvec,
  96. .extra1 = &xpc_hb_check_min_interval,
  97. .extra2 = &xpc_hb_check_max_interval},
  98. {}
  99. };
  100. static ctl_table xpc_sys_xpc_dir[] = {
  101. {
  102. .ctl_name = CTL_UNNUMBERED,
  103. .procname = "hb",
  104. .mode = 0555,
  105. .child = xpc_sys_xpc_hb_dir},
  106. {
  107. .ctl_name = CTL_UNNUMBERED,
  108. .procname = "disengage_timelimit",
  109. .data = &xpc_disengage_timelimit,
  110. .maxlen = sizeof(int),
  111. .mode = 0644,
  112. .proc_handler = &proc_dointvec_minmax,
  113. .strategy = &sysctl_intvec,
  114. .extra1 = &xpc_disengage_min_timelimit,
  115. .extra2 = &xpc_disengage_max_timelimit},
  116. {}
  117. };
  118. static ctl_table xpc_sys_dir[] = {
  119. {
  120. .ctl_name = CTL_UNNUMBERED,
  121. .procname = "xpc",
  122. .mode = 0555,
  123. .child = xpc_sys_xpc_dir},
  124. {}
  125. };
  126. static struct ctl_table_header *xpc_sysctl;
  127. /* non-zero if any remote partition disengage was timed out */
  128. int xpc_disengage_timedout;
  129. /* #of activate IRQs received and not yet processed */
  130. int xpc_activate_IRQ_rcvd;
  131. DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
  132. /* IRQ handler notifies this wait queue on receipt of an IRQ */
  133. DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
  134. static unsigned long xpc_hb_check_timeout;
  135. static struct timer_list xpc_hb_timer;
  136. void *xpc_heartbeating_to_mask;
  137. /* notification that the xpc_hb_checker thread has exited */
  138. static DECLARE_COMPLETION(xpc_hb_checker_exited);
  139. /* notification that the xpc_discovery thread has exited */
  140. static DECLARE_COMPLETION(xpc_discovery_exited);
  141. static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
  142. static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
  143. static struct notifier_block xpc_reboot_notifier = {
  144. .notifier_call = xpc_system_reboot,
  145. };
  146. static int xpc_system_die(struct notifier_block *, unsigned long, void *);
  147. static struct notifier_block xpc_die_notifier = {
  148. .notifier_call = xpc_system_die,
  149. };
  150. int (*xpc_setup_partitions_sn) (void);
  151. enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
  152. unsigned long *rp_pa,
  153. size_t *len);
  154. int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
  155. void (*xpc_heartbeat_init) (void);
  156. void (*xpc_heartbeat_exit) (void);
  157. void (*xpc_increment_heartbeat) (void);
  158. void (*xpc_offline_heartbeat) (void);
  159. void (*xpc_online_heartbeat) (void);
  160. enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
  161. enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
  162. void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
  163. u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
  164. enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
  165. void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
  166. void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
  167. int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
  168. void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
  169. void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
  170. unsigned long remote_rp_pa,
  171. int nasid);
  172. void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
  173. void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
  174. void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
  175. void (*xpc_process_activate_IRQ_rcvd) (void);
  176. enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
  177. void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
  178. void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
  179. int (*xpc_partition_engaged) (short partid);
  180. int (*xpc_any_partition_engaged) (void);
  181. void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
  182. void (*xpc_assume_partition_disengaged) (short partid);
  183. void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
  184. unsigned long *irq_flags);
  185. void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
  186. unsigned long *irq_flags);
  187. void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
  188. unsigned long *irq_flags);
  189. void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
  190. unsigned long *irq_flags);
  191. void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
  192. unsigned long msgqueue_pa);
  193. enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
  194. void *payload, u16 payload_size,
  195. u8 notify_type, xpc_notify_func func,
  196. void *key);
  197. void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
  198. /*
  199. * Timer function to enforce the timelimit on the partition disengage.
  200. */
  201. static void
  202. xpc_timeout_partition_disengage(unsigned long data)
  203. {
  204. struct xpc_partition *part = (struct xpc_partition *)data;
  205. DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
  206. (void)xpc_partition_disengaged(part);
  207. DBUG_ON(part->disengage_timeout != 0);
  208. DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
  209. }
  210. /*
  211. * Timer to produce the heartbeat. The timer structures function is
  212. * already set when this is initially called. A tunable is used to
  213. * specify when the next timeout should occur.
  214. */
  215. static void
  216. xpc_hb_beater(unsigned long dummy)
  217. {
  218. xpc_increment_heartbeat();
  219. if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
  220. wake_up_interruptible(&xpc_activate_IRQ_wq);
  221. xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
  222. add_timer(&xpc_hb_timer);
  223. }
  224. static void
  225. xpc_start_hb_beater(void)
  226. {
  227. xpc_heartbeat_init();
  228. init_timer(&xpc_hb_timer);
  229. xpc_hb_timer.function = xpc_hb_beater;
  230. xpc_hb_beater(0);
  231. }
  232. static void
  233. xpc_stop_hb_beater(void)
  234. {
  235. del_timer_sync(&xpc_hb_timer);
  236. xpc_heartbeat_exit();
  237. }
  238. /*
  239. * At periodic intervals, scan through all active partitions and ensure
  240. * their heartbeat is still active. If not, the partition is deactivated.
  241. */
  242. static void
  243. xpc_check_remote_hb(void)
  244. {
  245. struct xpc_partition *part;
  246. short partid;
  247. enum xp_retval ret;
  248. for (partid = 0; partid < xp_max_npartitions; partid++) {
  249. if (xpc_exiting)
  250. break;
  251. if (partid == xp_partition_id)
  252. continue;
  253. part = &xpc_partitions[partid];
  254. if (part->act_state == XPC_P_AS_INACTIVE ||
  255. part->act_state == XPC_P_AS_DEACTIVATING) {
  256. continue;
  257. }
  258. ret = xpc_get_remote_heartbeat(part);
  259. if (ret != xpSuccess)
  260. XPC_DEACTIVATE_PARTITION(part, ret);
  261. }
  262. }
  263. /*
  264. * This thread is responsible for nearly all of the partition
  265. * activation/deactivation.
  266. */
  267. static int
  268. xpc_hb_checker(void *ignore)
  269. {
  270. int force_IRQ = 0;
  271. /* this thread was marked active by xpc_hb_init() */
  272. set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
  273. /* set our heartbeating to other partitions into motion */
  274. xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
  275. xpc_start_hb_beater();
  276. while (!xpc_exiting) {
  277. dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
  278. "been received\n",
  279. (int)(xpc_hb_check_timeout - jiffies),
  280. xpc_activate_IRQ_rcvd);
  281. /* checking of remote heartbeats is skewed by IRQ handling */
  282. if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
  283. xpc_hb_check_timeout = jiffies +
  284. (xpc_hb_check_interval * HZ);
  285. dev_dbg(xpc_part, "checking remote heartbeats\n");
  286. xpc_check_remote_hb();
  287. /*
  288. * On sn2 we need to periodically recheck to ensure no
  289. * IRQ/amo pairs have been missed.
  290. */
  291. if (is_shub())
  292. force_IRQ = 1;
  293. }
  294. /* check for outstanding IRQs */
  295. if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
  296. force_IRQ = 0;
  297. dev_dbg(xpc_part, "processing activate IRQs "
  298. "received\n");
  299. xpc_process_activate_IRQ_rcvd();
  300. }
  301. /* wait for IRQ or timeout */
  302. (void)wait_event_interruptible(xpc_activate_IRQ_wq,
  303. (time_is_before_eq_jiffies(
  304. xpc_hb_check_timeout) ||
  305. xpc_activate_IRQ_rcvd > 0 ||
  306. xpc_exiting));
  307. }
  308. xpc_stop_hb_beater();
  309. dev_dbg(xpc_part, "heartbeat checker is exiting\n");
  310. /* mark this thread as having exited */
  311. complete(&xpc_hb_checker_exited);
  312. return 0;
  313. }
  314. /*
  315. * This thread will attempt to discover other partitions to activate
  316. * based on info provided by SAL. This new thread is short lived and
  317. * will exit once discovery is complete.
  318. */
  319. static int
  320. xpc_initiate_discovery(void *ignore)
  321. {
  322. xpc_discovery();
  323. dev_dbg(xpc_part, "discovery thread is exiting\n");
  324. /* mark this thread as having exited */
  325. complete(&xpc_discovery_exited);
  326. return 0;
  327. }
  328. /*
  329. * The first kthread assigned to a newly activated partition is the one
  330. * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
  331. * that kthread until the partition is brought down, at which time that kthread
  332. * returns back to XPC HB. (The return of that kthread will signify to XPC HB
  333. * that XPC has dismantled all communication infrastructure for the associated
  334. * partition.) This kthread becomes the channel manager for that partition.
  335. *
  336. * Each active partition has a channel manager, who, besides connecting and
  337. * disconnecting channels, will ensure that each of the partition's connected
  338. * channels has the required number of assigned kthreads to get the work done.
  339. */
  340. static void
  341. xpc_channel_mgr(struct xpc_partition *part)
  342. {
  343. while (part->act_state != XPC_P_AS_DEACTIVATING ||
  344. atomic_read(&part->nchannels_active) > 0 ||
  345. !xpc_partition_disengaged(part)) {
  346. xpc_process_sent_chctl_flags(part);
  347. /*
  348. * Wait until we've been requested to activate kthreads or
  349. * all of the channel's message queues have been torn down or
  350. * a signal is pending.
  351. *
  352. * The channel_mgr_requests is set to 1 after being awakened,
  353. * This is done to prevent the channel mgr from making one pass
  354. * through the loop for each request, since he will
  355. * be servicing all the requests in one pass. The reason it's
  356. * set to 1 instead of 0 is so that other kthreads will know
  357. * that the channel mgr is running and won't bother trying to
  358. * wake him up.
  359. */
  360. atomic_dec(&part->channel_mgr_requests);
  361. (void)wait_event_interruptible(part->channel_mgr_wq,
  362. (atomic_read(&part->channel_mgr_requests) > 0 ||
  363. part->chctl.all_flags != 0 ||
  364. (part->act_state == XPC_P_AS_DEACTIVATING &&
  365. atomic_read(&part->nchannels_active) == 0 &&
  366. xpc_partition_disengaged(part))));
  367. atomic_set(&part->channel_mgr_requests, 1);
  368. }
  369. }
  370. /*
  371. * Guarantee that the kzalloc'd memory is cacheline aligned.
  372. */
  373. void *
  374. xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  375. {
  376. /* see if kzalloc will give us cachline aligned memory by default */
  377. *base = kzalloc(size, flags);
  378. if (*base == NULL)
  379. return NULL;
  380. if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
  381. return *base;
  382. kfree(*base);
  383. /* nope, we'll have to do it ourselves */
  384. *base = kzalloc(size + L1_CACHE_BYTES, flags);
  385. if (*base == NULL)
  386. return NULL;
  387. return (void *)L1_CACHE_ALIGN((u64)*base);
  388. }
  389. /*
  390. * Setup the channel structures necessary to support XPartition Communication
  391. * between the specified remote partition and the local one.
  392. */
  393. static enum xp_retval
  394. xpc_setup_ch_structures(struct xpc_partition *part)
  395. {
  396. enum xp_retval ret;
  397. int ch_number;
  398. struct xpc_channel *ch;
  399. short partid = XPC_PARTID(part);
  400. /*
  401. * Allocate all of the channel structures as a contiguous chunk of
  402. * memory.
  403. */
  404. DBUG_ON(part->channels != NULL);
  405. part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
  406. GFP_KERNEL);
  407. if (part->channels == NULL) {
  408. dev_err(xpc_chan, "can't get memory for channels\n");
  409. return xpNoMemory;
  410. }
  411. /* allocate the remote open and close args */
  412. part->remote_openclose_args =
  413. xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
  414. GFP_KERNEL, &part->
  415. remote_openclose_args_base);
  416. if (part->remote_openclose_args == NULL) {
  417. dev_err(xpc_chan, "can't get memory for remote connect args\n");
  418. ret = xpNoMemory;
  419. goto out_1;
  420. }
  421. part->chctl.all_flags = 0;
  422. spin_lock_init(&part->chctl_lock);
  423. atomic_set(&part->channel_mgr_requests, 1);
  424. init_waitqueue_head(&part->channel_mgr_wq);
  425. part->nchannels = XPC_MAX_NCHANNELS;
  426. atomic_set(&part->nchannels_active, 0);
  427. atomic_set(&part->nchannels_engaged, 0);
  428. for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
  429. ch = &part->channels[ch_number];
  430. ch->partid = partid;
  431. ch->number = ch_number;
  432. ch->flags = XPC_C_DISCONNECTED;
  433. atomic_set(&ch->kthreads_assigned, 0);
  434. atomic_set(&ch->kthreads_idle, 0);
  435. atomic_set(&ch->kthreads_active, 0);
  436. atomic_set(&ch->references, 0);
  437. atomic_set(&ch->n_to_notify, 0);
  438. spin_lock_init(&ch->lock);
  439. init_completion(&ch->wdisconnect_wait);
  440. atomic_set(&ch->n_on_msg_allocate_wq, 0);
  441. init_waitqueue_head(&ch->msg_allocate_wq);
  442. init_waitqueue_head(&ch->idle_wq);
  443. }
  444. ret = xpc_setup_ch_structures_sn(part);
  445. if (ret != xpSuccess)
  446. goto out_2;
  447. /*
  448. * With the setting of the partition setup_state to XPC_P_SS_SETUP,
  449. * we're declaring that this partition is ready to go.
  450. */
  451. part->setup_state = XPC_P_SS_SETUP;
  452. return xpSuccess;
  453. /* setup of ch structures failed */
  454. out_2:
  455. kfree(part->remote_openclose_args_base);
  456. part->remote_openclose_args = NULL;
  457. out_1:
  458. kfree(part->channels);
  459. part->channels = NULL;
  460. return ret;
  461. }
  462. /*
  463. * Teardown the channel structures necessary to support XPartition Communication
  464. * between the specified remote partition and the local one.
  465. */
  466. static void
  467. xpc_teardown_ch_structures(struct xpc_partition *part)
  468. {
  469. DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
  470. DBUG_ON(atomic_read(&part->nchannels_active) != 0);
  471. /*
  472. * Make this partition inaccessible to local processes by marking it
  473. * as no longer setup. Then wait before proceeding with the teardown
  474. * until all existing references cease.
  475. */
  476. DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
  477. part->setup_state = XPC_P_SS_WTEARDOWN;
  478. wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
  479. /* now we can begin tearing down the infrastructure */
  480. xpc_teardown_ch_structures_sn(part);
  481. kfree(part->remote_openclose_args_base);
  482. part->remote_openclose_args = NULL;
  483. kfree(part->channels);
  484. part->channels = NULL;
  485. part->setup_state = XPC_P_SS_TORNDOWN;
  486. }
  487. /*
  488. * When XPC HB determines that a partition has come up, it will create a new
  489. * kthread and that kthread will call this function to attempt to set up the
  490. * basic infrastructure used for Cross Partition Communication with the newly
  491. * upped partition.
  492. *
  493. * The kthread that was created by XPC HB and which setup the XPC
  494. * infrastructure will remain assigned to the partition becoming the channel
  495. * manager for that partition until the partition is deactivating, at which
  496. * time the kthread will teardown the XPC infrastructure and then exit.
  497. */
  498. static int
  499. xpc_activating(void *__partid)
  500. {
  501. short partid = (u64)__partid;
  502. struct xpc_partition *part = &xpc_partitions[partid];
  503. unsigned long irq_flags;
  504. DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
  505. spin_lock_irqsave(&part->act_lock, irq_flags);
  506. if (part->act_state == XPC_P_AS_DEACTIVATING) {
  507. part->act_state = XPC_P_AS_INACTIVE;
  508. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  509. part->remote_rp_pa = 0;
  510. return 0;
  511. }
  512. /* indicate the thread is activating */
  513. DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
  514. part->act_state = XPC_P_AS_ACTIVATING;
  515. XPC_SET_REASON(part, 0, 0);
  516. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  517. dev_dbg(xpc_part, "activating partition %d\n", partid);
  518. xpc_allow_hb(partid);
  519. if (xpc_setup_ch_structures(part) == xpSuccess) {
  520. (void)xpc_part_ref(part); /* this will always succeed */
  521. if (xpc_make_first_contact(part) == xpSuccess) {
  522. xpc_mark_partition_active(part);
  523. xpc_channel_mgr(part);
  524. /* won't return until partition is deactivating */
  525. }
  526. xpc_part_deref(part);
  527. xpc_teardown_ch_structures(part);
  528. }
  529. xpc_disallow_hb(partid);
  530. xpc_mark_partition_inactive(part);
  531. if (part->reason == xpReactivating) {
  532. /* interrupting ourselves results in activating partition */
  533. xpc_request_partition_reactivation(part);
  534. }
  535. return 0;
  536. }
  537. void
  538. xpc_activate_partition(struct xpc_partition *part)
  539. {
  540. short partid = XPC_PARTID(part);
  541. unsigned long irq_flags;
  542. struct task_struct *kthread;
  543. spin_lock_irqsave(&part->act_lock, irq_flags);
  544. DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
  545. part->act_state = XPC_P_AS_ACTIVATION_REQ;
  546. XPC_SET_REASON(part, xpCloneKThread, __LINE__);
  547. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  548. kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
  549. partid);
  550. if (IS_ERR(kthread)) {
  551. spin_lock_irqsave(&part->act_lock, irq_flags);
  552. part->act_state = XPC_P_AS_INACTIVE;
  553. XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
  554. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  555. }
  556. }
  557. void
  558. xpc_activate_kthreads(struct xpc_channel *ch, int needed)
  559. {
  560. int idle = atomic_read(&ch->kthreads_idle);
  561. int assigned = atomic_read(&ch->kthreads_assigned);
  562. int wakeup;
  563. DBUG_ON(needed <= 0);
  564. if (idle > 0) {
  565. wakeup = (needed > idle) ? idle : needed;
  566. needed -= wakeup;
  567. dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
  568. "channel=%d\n", wakeup, ch->partid, ch->number);
  569. /* only wakeup the requested number of kthreads */
  570. wake_up_nr(&ch->idle_wq, wakeup);
  571. }
  572. if (needed <= 0)
  573. return;
  574. if (needed + assigned > ch->kthreads_assigned_limit) {
  575. needed = ch->kthreads_assigned_limit - assigned;
  576. if (needed <= 0)
  577. return;
  578. }
  579. dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
  580. needed, ch->partid, ch->number);
  581. xpc_create_kthreads(ch, needed, 0);
  582. }
  583. /*
  584. * This function is where XPC's kthreads wait for messages to deliver.
  585. */
  586. static void
  587. xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
  588. {
  589. do {
  590. /* deliver messages to their intended recipients */
  591. while (xpc_n_of_deliverable_payloads(ch) > 0 &&
  592. !(ch->flags & XPC_C_DISCONNECTING)) {
  593. xpc_deliver_payload(ch);
  594. }
  595. if (atomic_inc_return(&ch->kthreads_idle) >
  596. ch->kthreads_idle_limit) {
  597. /* too many idle kthreads on this channel */
  598. atomic_dec(&ch->kthreads_idle);
  599. break;
  600. }
  601. dev_dbg(xpc_chan, "idle kthread calling "
  602. "wait_event_interruptible_exclusive()\n");
  603. (void)wait_event_interruptible_exclusive(ch->idle_wq,
  604. (xpc_n_of_deliverable_payloads(ch) > 0 ||
  605. (ch->flags & XPC_C_DISCONNECTING)));
  606. atomic_dec(&ch->kthreads_idle);
  607. } while (!(ch->flags & XPC_C_DISCONNECTING));
  608. }
  609. static int
  610. xpc_kthread_start(void *args)
  611. {
  612. short partid = XPC_UNPACK_ARG1(args);
  613. u16 ch_number = XPC_UNPACK_ARG2(args);
  614. struct xpc_partition *part = &xpc_partitions[partid];
  615. struct xpc_channel *ch;
  616. int n_needed;
  617. unsigned long irq_flags;
  618. dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
  619. partid, ch_number);
  620. ch = &part->channels[ch_number];
  621. if (!(ch->flags & XPC_C_DISCONNECTING)) {
  622. /* let registerer know that connection has been established */
  623. spin_lock_irqsave(&ch->lock, irq_flags);
  624. if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
  625. ch->flags |= XPC_C_CONNECTEDCALLOUT;
  626. spin_unlock_irqrestore(&ch->lock, irq_flags);
  627. xpc_connected_callout(ch);
  628. spin_lock_irqsave(&ch->lock, irq_flags);
  629. ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
  630. spin_unlock_irqrestore(&ch->lock, irq_flags);
  631. /*
  632. * It is possible that while the callout was being
  633. * made that the remote partition sent some messages.
  634. * If that is the case, we may need to activate
  635. * additional kthreads to help deliver them. We only
  636. * need one less than total #of messages to deliver.
  637. */
  638. n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
  639. if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
  640. xpc_activate_kthreads(ch, n_needed);
  641. } else {
  642. spin_unlock_irqrestore(&ch->lock, irq_flags);
  643. }
  644. xpc_kthread_waitmsgs(part, ch);
  645. }
  646. /* let registerer know that connection is disconnecting */
  647. spin_lock_irqsave(&ch->lock, irq_flags);
  648. if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
  649. !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
  650. ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
  651. spin_unlock_irqrestore(&ch->lock, irq_flags);
  652. xpc_disconnect_callout(ch, xpDisconnecting);
  653. spin_lock_irqsave(&ch->lock, irq_flags);
  654. ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
  655. }
  656. spin_unlock_irqrestore(&ch->lock, irq_flags);
  657. if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
  658. atomic_dec_return(&part->nchannels_engaged) == 0) {
  659. xpc_indicate_partition_disengaged(part);
  660. }
  661. xpc_msgqueue_deref(ch);
  662. dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
  663. partid, ch_number);
  664. xpc_part_deref(part);
  665. return 0;
  666. }
  667. /*
  668. * For each partition that XPC has established communications with, there is
  669. * a minimum of one kernel thread assigned to perform any operation that
  670. * may potentially sleep or block (basically the callouts to the asynchronous
  671. * functions registered via xpc_connect()).
  672. *
  673. * Additional kthreads are created and destroyed by XPC as the workload
  674. * demands.
  675. *
  676. * A kthread is assigned to one of the active channels that exists for a given
  677. * partition.
  678. */
  679. void
  680. xpc_create_kthreads(struct xpc_channel *ch, int needed,
  681. int ignore_disconnecting)
  682. {
  683. unsigned long irq_flags;
  684. u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
  685. struct xpc_partition *part = &xpc_partitions[ch->partid];
  686. struct task_struct *kthread;
  687. while (needed-- > 0) {
  688. /*
  689. * The following is done on behalf of the newly created
  690. * kthread. That kthread is responsible for doing the
  691. * counterpart to the following before it exits.
  692. */
  693. if (ignore_disconnecting) {
  694. if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
  695. /* kthreads assigned had gone to zero */
  696. BUG_ON(!(ch->flags &
  697. XPC_C_DISCONNECTINGCALLOUT_MADE));
  698. break;
  699. }
  700. } else if (ch->flags & XPC_C_DISCONNECTING) {
  701. break;
  702. } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
  703. atomic_inc_return(&part->nchannels_engaged) == 1) {
  704. xpc_indicate_partition_engaged(part);
  705. }
  706. (void)xpc_part_ref(part);
  707. xpc_msgqueue_ref(ch);
  708. kthread = kthread_run(xpc_kthread_start, (void *)args,
  709. "xpc%02dc%d", ch->partid, ch->number);
  710. if (IS_ERR(kthread)) {
  711. /* the fork failed */
  712. /*
  713. * NOTE: if (ignore_disconnecting &&
  714. * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
  715. * then we'll deadlock if all other kthreads assigned
  716. * to this channel are blocked in the channel's
  717. * registerer, because the only thing that will unblock
  718. * them is the xpDisconnecting callout that this
  719. * failed kthread_run() would have made.
  720. */
  721. if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
  722. atomic_dec_return(&part->nchannels_engaged) == 0) {
  723. xpc_indicate_partition_disengaged(part);
  724. }
  725. xpc_msgqueue_deref(ch);
  726. xpc_part_deref(part);
  727. if (atomic_read(&ch->kthreads_assigned) <
  728. ch->kthreads_idle_limit) {
  729. /*
  730. * Flag this as an error only if we have an
  731. * insufficient #of kthreads for the channel
  732. * to function.
  733. */
  734. spin_lock_irqsave(&ch->lock, irq_flags);
  735. XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
  736. &irq_flags);
  737. spin_unlock_irqrestore(&ch->lock, irq_flags);
  738. }
  739. break;
  740. }
  741. }
  742. }
  743. void
  744. xpc_disconnect_wait(int ch_number)
  745. {
  746. unsigned long irq_flags;
  747. short partid;
  748. struct xpc_partition *part;
  749. struct xpc_channel *ch;
  750. int wakeup_channel_mgr;
  751. /* now wait for all callouts to the caller's function to cease */
  752. for (partid = 0; partid < xp_max_npartitions; partid++) {
  753. part = &xpc_partitions[partid];
  754. if (!xpc_part_ref(part))
  755. continue;
  756. ch = &part->channels[ch_number];
  757. if (!(ch->flags & XPC_C_WDISCONNECT)) {
  758. xpc_part_deref(part);
  759. continue;
  760. }
  761. wait_for_completion(&ch->wdisconnect_wait);
  762. spin_lock_irqsave(&ch->lock, irq_flags);
  763. DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
  764. wakeup_channel_mgr = 0;
  765. if (ch->delayed_chctl_flags) {
  766. if (part->act_state != XPC_P_AS_DEACTIVATING) {
  767. spin_lock(&part->chctl_lock);
  768. part->chctl.flags[ch->number] |=
  769. ch->delayed_chctl_flags;
  770. spin_unlock(&part->chctl_lock);
  771. wakeup_channel_mgr = 1;
  772. }
  773. ch->delayed_chctl_flags = 0;
  774. }
  775. ch->flags &= ~XPC_C_WDISCONNECT;
  776. spin_unlock_irqrestore(&ch->lock, irq_flags);
  777. if (wakeup_channel_mgr)
  778. xpc_wakeup_channel_mgr(part);
  779. xpc_part_deref(part);
  780. }
  781. }
  782. static int
  783. xpc_setup_partitions(void)
  784. {
  785. short partid;
  786. struct xpc_partition *part;
  787. xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
  788. xp_max_npartitions, GFP_KERNEL);
  789. if (xpc_partitions == NULL) {
  790. dev_err(xpc_part, "can't get memory for partition structure\n");
  791. return -ENOMEM;
  792. }
  793. /*
  794. * The first few fields of each entry of xpc_partitions[] need to
  795. * be initialized now so that calls to xpc_connect() and
  796. * xpc_disconnect() can be made prior to the activation of any remote
  797. * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
  798. * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
  799. * PARTITION HAS BEEN ACTIVATED.
  800. */
  801. for (partid = 0; partid < xp_max_npartitions; partid++) {
  802. part = &xpc_partitions[partid];
  803. DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
  804. part->activate_IRQ_rcvd = 0;
  805. spin_lock_init(&part->act_lock);
  806. part->act_state = XPC_P_AS_INACTIVE;
  807. XPC_SET_REASON(part, 0, 0);
  808. init_timer(&part->disengage_timer);
  809. part->disengage_timer.function =
  810. xpc_timeout_partition_disengage;
  811. part->disengage_timer.data = (unsigned long)part;
  812. part->setup_state = XPC_P_SS_UNSET;
  813. init_waitqueue_head(&part->teardown_wq);
  814. atomic_set(&part->references, 0);
  815. }
  816. return xpc_setup_partitions_sn();
  817. }
  818. static void
  819. xpc_teardown_partitions(void)
  820. {
  821. kfree(xpc_partitions);
  822. }
  823. static void
  824. xpc_do_exit(enum xp_retval reason)
  825. {
  826. short partid;
  827. int active_part_count, printed_waiting_msg = 0;
  828. struct xpc_partition *part;
  829. unsigned long printmsg_time, disengage_timeout = 0;
  830. /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
  831. DBUG_ON(xpc_exiting == 1);
  832. /*
  833. * Let the heartbeat checker thread and the discovery thread
  834. * (if one is running) know that they should exit. Also wake up
  835. * the heartbeat checker thread in case it's sleeping.
  836. */
  837. xpc_exiting = 1;
  838. wake_up_interruptible(&xpc_activate_IRQ_wq);
  839. /* wait for the discovery thread to exit */
  840. wait_for_completion(&xpc_discovery_exited);
  841. /* wait for the heartbeat checker thread to exit */
  842. wait_for_completion(&xpc_hb_checker_exited);
  843. /* sleep for a 1/3 of a second or so */
  844. (void)msleep_interruptible(300);
  845. /* wait for all partitions to become inactive */
  846. printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
  847. xpc_disengage_timedout = 0;
  848. do {
  849. active_part_count = 0;
  850. for (partid = 0; partid < xp_max_npartitions; partid++) {
  851. part = &xpc_partitions[partid];
  852. if (xpc_partition_disengaged(part) &&
  853. part->act_state == XPC_P_AS_INACTIVE) {
  854. continue;
  855. }
  856. active_part_count++;
  857. XPC_DEACTIVATE_PARTITION(part, reason);
  858. if (part->disengage_timeout > disengage_timeout)
  859. disengage_timeout = part->disengage_timeout;
  860. }
  861. if (xpc_any_partition_engaged()) {
  862. if (time_is_before_jiffies(printmsg_time)) {
  863. dev_info(xpc_part, "waiting for remote "
  864. "partitions to deactivate, timeout in "
  865. "%ld seconds\n", (disengage_timeout -
  866. jiffies) / HZ);
  867. printmsg_time = jiffies +
  868. (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
  869. printed_waiting_msg = 1;
  870. }
  871. } else if (active_part_count > 0) {
  872. if (printed_waiting_msg) {
  873. dev_info(xpc_part, "waiting for local partition"
  874. " to deactivate\n");
  875. printed_waiting_msg = 0;
  876. }
  877. } else {
  878. if (!xpc_disengage_timedout) {
  879. dev_info(xpc_part, "all partitions have "
  880. "deactivated\n");
  881. }
  882. break;
  883. }
  884. /* sleep for a 1/3 of a second or so */
  885. (void)msleep_interruptible(300);
  886. } while (1);
  887. DBUG_ON(xpc_any_partition_engaged());
  888. DBUG_ON(xpc_any_hbs_allowed() != 0);
  889. xpc_teardown_rsvd_page();
  890. if (reason == xpUnloading) {
  891. (void)unregister_die_notifier(&xpc_die_notifier);
  892. (void)unregister_reboot_notifier(&xpc_reboot_notifier);
  893. }
  894. /* clear the interface to XPC's functions */
  895. xpc_clear_interface();
  896. if (xpc_sysctl)
  897. unregister_sysctl_table(xpc_sysctl);
  898. xpc_teardown_partitions();
  899. if (is_shub())
  900. xpc_exit_sn2();
  901. else
  902. xpc_exit_uv();
  903. }
  904. /*
  905. * This function is called when the system is being rebooted.
  906. */
  907. static int
  908. xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
  909. {
  910. enum xp_retval reason;
  911. switch (event) {
  912. case SYS_RESTART:
  913. reason = xpSystemReboot;
  914. break;
  915. case SYS_HALT:
  916. reason = xpSystemHalt;
  917. break;
  918. case SYS_POWER_OFF:
  919. reason = xpSystemPoweroff;
  920. break;
  921. default:
  922. reason = xpSystemGoingDown;
  923. }
  924. xpc_do_exit(reason);
  925. return NOTIFY_DONE;
  926. }
  927. /*
  928. * Notify other partitions to deactivate from us by first disengaging from all
  929. * references to our memory.
  930. */
  931. static void
  932. xpc_die_deactivate(void)
  933. {
  934. struct xpc_partition *part;
  935. short partid;
  936. int any_engaged;
  937. long keep_waiting;
  938. long wait_to_print;
  939. /* keep xpc_hb_checker thread from doing anything (just in case) */
  940. xpc_exiting = 1;
  941. xpc_disallow_all_hbs(); /*indicate we're deactivated */
  942. for (partid = 0; partid < xp_max_npartitions; partid++) {
  943. part = &xpc_partitions[partid];
  944. if (xpc_partition_engaged(partid) ||
  945. part->act_state != XPC_P_AS_INACTIVE) {
  946. xpc_request_partition_deactivation(part);
  947. xpc_indicate_partition_disengaged(part);
  948. }
  949. }
  950. /*
  951. * Though we requested that all other partitions deactivate from us,
  952. * we only wait until they've all disengaged or we've reached the
  953. * defined timelimit.
  954. *
  955. * Given that one iteration through the following while-loop takes
  956. * approximately 200 microseconds, calculate the #of loops to take
  957. * before bailing and the #of loops before printing a waiting message.
  958. */
  959. keep_waiting = xpc_disengage_timelimit * 1000 * 5;
  960. wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
  961. while (1) {
  962. any_engaged = xpc_any_partition_engaged();
  963. if (!any_engaged) {
  964. dev_info(xpc_part, "all partitions have deactivated\n");
  965. break;
  966. }
  967. if (!keep_waiting--) {
  968. for (partid = 0; partid < xp_max_npartitions;
  969. partid++) {
  970. if (xpc_partition_engaged(partid)) {
  971. dev_info(xpc_part, "deactivate from "
  972. "remote partition %d timed "
  973. "out\n", partid);
  974. }
  975. }
  976. break;
  977. }
  978. if (!wait_to_print--) {
  979. dev_info(xpc_part, "waiting for remote partitions to "
  980. "deactivate, timeout in %ld seconds\n",
  981. keep_waiting / (1000 * 5));
  982. wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
  983. 1000 * 5;
  984. }
  985. udelay(200);
  986. }
  987. }
  988. /*
  989. * This function is called when the system is being restarted or halted due
  990. * to some sort of system failure. If this is the case we need to notify the
  991. * other partitions to disengage from all references to our memory.
  992. * This function can also be called when our heartbeater could be offlined
  993. * for a time. In this case we need to notify other partitions to not worry
  994. * about the lack of a heartbeat.
  995. */
  996. static int
  997. xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
  998. {
  999. #ifdef CONFIG_IA64 /* !!! temporary kludge */
  1000. switch (event) {
  1001. case DIE_MACHINE_RESTART:
  1002. case DIE_MACHINE_HALT:
  1003. xpc_die_deactivate();
  1004. break;
  1005. case DIE_KDEBUG_ENTER:
  1006. /* Should lack of heartbeat be ignored by other partitions? */
  1007. if (!xpc_kdebug_ignore)
  1008. break;
  1009. /* fall through */
  1010. case DIE_MCA_MONARCH_ENTER:
  1011. case DIE_INIT_MONARCH_ENTER:
  1012. xpc_offline_heartbeat();
  1013. break;
  1014. case DIE_KDEBUG_LEAVE:
  1015. /* Is lack of heartbeat being ignored by other partitions? */
  1016. if (!xpc_kdebug_ignore)
  1017. break;
  1018. /* fall through */
  1019. case DIE_MCA_MONARCH_LEAVE:
  1020. case DIE_INIT_MONARCH_LEAVE:
  1021. xpc_online_heartbeat();
  1022. break;
  1023. }
  1024. #else
  1025. xpc_die_deactivate();
  1026. #endif
  1027. return NOTIFY_DONE;
  1028. }
  1029. int __init
  1030. xpc_init(void)
  1031. {
  1032. int ret;
  1033. struct task_struct *kthread;
  1034. snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
  1035. snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
  1036. if (is_shub()) {
  1037. /*
  1038. * The ia64-sn2 architecture supports at most 64 partitions.
  1039. * And the inability to unregister remote amos restricts us
  1040. * further to only support exactly 64 partitions on this
  1041. * architecture, no less.
  1042. */
  1043. if (xp_max_npartitions != 64) {
  1044. dev_err(xpc_part, "max #of partitions not set to 64\n");
  1045. ret = -EINVAL;
  1046. } else {
  1047. ret = xpc_init_sn2();
  1048. }
  1049. } else if (is_uv()) {
  1050. ret = xpc_init_uv();
  1051. } else {
  1052. ret = -ENODEV;
  1053. }
  1054. if (ret != 0)
  1055. return ret;
  1056. ret = xpc_setup_partitions();
  1057. if (ret != 0) {
  1058. dev_err(xpc_part, "can't get memory for partition structure\n");
  1059. goto out_1;
  1060. }
  1061. xpc_sysctl = register_sysctl_table(xpc_sys_dir);
  1062. /*
  1063. * Fill the partition reserved page with the information needed by
  1064. * other partitions to discover we are alive and establish initial
  1065. * communications.
  1066. */
  1067. ret = xpc_setup_rsvd_page();
  1068. if (ret != 0) {
  1069. dev_err(xpc_part, "can't setup our reserved page\n");
  1070. goto out_2;
  1071. }
  1072. /* add ourselves to the reboot_notifier_list */
  1073. ret = register_reboot_notifier(&xpc_reboot_notifier);
  1074. if (ret != 0)
  1075. dev_warn(xpc_part, "can't register reboot notifier\n");
  1076. /* add ourselves to the die_notifier list */
  1077. ret = register_die_notifier(&xpc_die_notifier);
  1078. if (ret != 0)
  1079. dev_warn(xpc_part, "can't register die notifier\n");
  1080. /*
  1081. * The real work-horse behind xpc. This processes incoming
  1082. * interrupts and monitors remote heartbeats.
  1083. */
  1084. kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
  1085. if (IS_ERR(kthread)) {
  1086. dev_err(xpc_part, "failed while forking hb check thread\n");
  1087. ret = -EBUSY;
  1088. goto out_3;
  1089. }
  1090. /*
  1091. * Startup a thread that will attempt to discover other partitions to
  1092. * activate based on info provided by SAL. This new thread is short
  1093. * lived and will exit once discovery is complete.
  1094. */
  1095. kthread = kthread_run(xpc_initiate_discovery, NULL,
  1096. XPC_DISCOVERY_THREAD_NAME);
  1097. if (IS_ERR(kthread)) {
  1098. dev_err(xpc_part, "failed while forking discovery thread\n");
  1099. /* mark this new thread as a non-starter */
  1100. complete(&xpc_discovery_exited);
  1101. xpc_do_exit(xpUnloading);
  1102. return -EBUSY;
  1103. }
  1104. /* set the interface to point at XPC's functions */
  1105. xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
  1106. xpc_initiate_send, xpc_initiate_send_notify,
  1107. xpc_initiate_received, xpc_initiate_partid_to_nasids);
  1108. return 0;
  1109. /* initialization was not successful */
  1110. out_3:
  1111. xpc_teardown_rsvd_page();
  1112. (void)unregister_die_notifier(&xpc_die_notifier);
  1113. (void)unregister_reboot_notifier(&xpc_reboot_notifier);
  1114. out_2:
  1115. if (xpc_sysctl)
  1116. unregister_sysctl_table(xpc_sysctl);
  1117. xpc_teardown_partitions();
  1118. out_1:
  1119. if (is_shub())
  1120. xpc_exit_sn2();
  1121. else
  1122. xpc_exit_uv();
  1123. return ret;
  1124. }
  1125. module_init(xpc_init);
  1126. void __exit
  1127. xpc_exit(void)
  1128. {
  1129. xpc_do_exit(xpUnloading);
  1130. }
  1131. module_exit(xpc_exit);
  1132. MODULE_AUTHOR("Silicon Graphics, Inc.");
  1133. MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
  1134. MODULE_LICENSE("GPL");
  1135. module_param(xpc_hb_interval, int, 0);
  1136. MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
  1137. "heartbeat increments.");
  1138. module_param(xpc_hb_check_interval, int, 0);
  1139. MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
  1140. "heartbeat checks.");
  1141. module_param(xpc_disengage_timelimit, int, 0);
  1142. MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
  1143. "for disengage to complete.");
  1144. module_param(xpc_kdebug_ignore, int, 0);
  1145. MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
  1146. "other partitions when dropping into kdebug.");