xpc_partition.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. /*
  9. * Cross Partition Communication (XPC) partition support.
  10. *
  11. * This is the part of XPC that detects the presence/absence of
  12. * other partitions. It provides a heartbeat and monitors the
  13. * heartbeats of other partitions.
  14. *
  15. */
  16. #include <linux/kernel.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/cache.h>
  19. #include <linux/mmzone.h>
  20. #include <linux/nodemask.h>
  21. #include <asm/sn/intr.h>
  22. #include <asm/sn/sn_sal.h>
  23. #include <asm/sn/nodepda.h>
  24. #include <asm/sn/addrs.h>
  25. #include "xpc.h"
  26. /* XPC is exiting flag */
  27. int xpc_exiting;
  28. /* SH_IPI_ACCESS shub register value on startup */
  29. static u64 xpc_sh1_IPI_access;
  30. static u64 xpc_sh2_IPI_access0;
  31. static u64 xpc_sh2_IPI_access1;
  32. static u64 xpc_sh2_IPI_access2;
  33. static u64 xpc_sh2_IPI_access3;
  34. /* original protection values for each node */
  35. u64 xpc_prot_vec[MAX_NUMNODES];
  36. /* this partition's reserved page pointers */
  37. struct xpc_rsvd_page *xpc_rsvd_page;
  38. static u64 *xpc_part_nasids;
  39. static u64 *xpc_mach_nasids;
  40. /* >>> next two variables should be 'xpc_' if they remain here */
  41. static int xp_sizeof_nasid_mask; /* actual size in bytes of nasid mask */
  42. int xp_nasid_mask_words; /* actual size in words of nasid mask */
  43. struct xpc_partition *xpc_partitions;
  44. /*
  45. * Generic buffer used to store a local copy of portions of a remote
  46. * partition's reserved page (either its header and part_nasids mask,
  47. * or its vars).
  48. */
  49. char *xpc_remote_copy_buffer;
  50. void *xpc_remote_copy_buffer_base;
  51. /*
  52. * Guarantee that the kmalloc'd memory is cacheline aligned.
  53. */
  54. void *
  55. xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  56. {
  57. /* see if kmalloc will give us cachline aligned memory by default */
  58. *base = kmalloc(size, flags);
  59. if (*base == NULL)
  60. return NULL;
  61. if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
  62. return *base;
  63. kfree(*base);
  64. /* nope, we'll have to do it ourselves */
  65. *base = kmalloc(size + L1_CACHE_BYTES, flags);
  66. if (*base == NULL)
  67. return NULL;
  68. return (void *)L1_CACHE_ALIGN((u64)*base);
  69. }
  70. /*
  71. * Given a nasid, get the physical address of the partition's reserved page
  72. * for that nasid. This function returns 0 on any error.
  73. */
  74. static u64
  75. xpc_get_rsvd_page_pa(int nasid)
  76. {
  77. enum xp_retval ret;
  78. s64 status;
  79. u64 cookie = 0;
  80. u64 rp_pa = nasid; /* seed with nasid */
  81. u64 len = 0;
  82. u64 buf = buf;
  83. u64 buf_len = 0;
  84. void *buf_base = NULL;
  85. while (1) {
  86. status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
  87. &len);
  88. dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
  89. "0x%016lx, address=0x%016lx, len=0x%016lx\n",
  90. status, cookie, rp_pa, len);
  91. if (status != SALRET_MORE_PASSES)
  92. break;
  93. /* >>> L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
  94. if (L1_CACHE_ALIGN(len) > buf_len) {
  95. kfree(buf_base);
  96. buf_len = L1_CACHE_ALIGN(len);
  97. buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
  98. GFP_KERNEL,
  99. &buf_base);
  100. if (buf_base == NULL) {
  101. dev_err(xpc_part, "unable to kmalloc "
  102. "len=0x%016lx\n", buf_len);
  103. status = SALRET_ERROR;
  104. break;
  105. }
  106. }
  107. ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
  108. if (ret != xpSuccess) {
  109. dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
  110. status = SALRET_ERROR;
  111. break;
  112. }
  113. }
  114. kfree(buf_base);
  115. if (status != SALRET_OK)
  116. rp_pa = 0;
  117. dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
  118. return rp_pa;
  119. }
  120. /*
  121. * Fill the partition reserved page with the information needed by
  122. * other partitions to discover we are alive and establish initial
  123. * communications.
  124. */
  125. struct xpc_rsvd_page *
  126. xpc_setup_rsvd_page(void)
  127. {
  128. struct xpc_rsvd_page *rp;
  129. u64 rp_pa;
  130. /* get the local reserved page's address */
  131. preempt_disable();
  132. rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
  133. preempt_enable();
  134. if (rp_pa == 0) {
  135. dev_err(xpc_part, "SAL failed to locate the reserved page\n");
  136. return NULL;
  137. }
  138. rp = (struct xpc_rsvd_page *)__va(rp_pa);
  139. if (rp->SAL_version < 3) {
  140. /* SAL_versions < 3 had a SAL_partid defined as a u8 */
  141. rp->SAL_partid &= 0xff;
  142. }
  143. BUG_ON(rp->SAL_partid != sn_partition_id);
  144. if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
  145. dev_err(xpc_part, "the reserved page's partid of %d is outside "
  146. "supported range (< 0 || >= %d)\n", rp->SAL_partid,
  147. xp_max_npartitions);
  148. return NULL;
  149. }
  150. rp->version = XPC_RP_VERSION;
  151. rp->max_npartitions = xp_max_npartitions;
  152. /* establish the actual sizes of the nasid masks */
  153. if (rp->SAL_version == 1) {
  154. /* SAL_version 1 didn't set the nasids_size field */
  155. rp->SAL_nasids_size = 128;
  156. }
  157. xp_sizeof_nasid_mask = rp->SAL_nasids_size;
  158. xp_nasid_mask_words = DIV_ROUND_UP(xp_sizeof_nasid_mask,
  159. BYTES_PER_WORD);
  160. /* setup the pointers to the various items in the reserved page */
  161. xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
  162. xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
  163. if (xpc_rsvd_page_init(rp) != xpSuccess)
  164. return NULL;
  165. /*
  166. * Set timestamp of when reserved page was setup by XPC.
  167. * This signifies to the remote partition that our reserved
  168. * page is initialized.
  169. */
  170. rp->stamp = CURRENT_TIME;
  171. return rp;
  172. }
  173. /*
  174. * Change protections to allow IPI operations (and AMO operations on
  175. * Shub 1.1 systems).
  176. */
  177. void
  178. xpc_allow_IPI_ops(void)
  179. {
  180. int node;
  181. int nasid;
  182. /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
  183. if (is_shub2()) {
  184. xpc_sh2_IPI_access0 =
  185. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
  186. xpc_sh2_IPI_access1 =
  187. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
  188. xpc_sh2_IPI_access2 =
  189. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
  190. xpc_sh2_IPI_access3 =
  191. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
  192. for_each_online_node(node) {
  193. nasid = cnodeid_to_nasid(node);
  194. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  195. -1UL);
  196. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  197. -1UL);
  198. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  199. -1UL);
  200. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  201. -1UL);
  202. }
  203. } else {
  204. xpc_sh1_IPI_access =
  205. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
  206. for_each_online_node(node) {
  207. nasid = cnodeid_to_nasid(node);
  208. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  209. -1UL);
  210. /*
  211. * Since the BIST collides with memory operations on
  212. * SHUB 1.1 sn_change_memprotect() cannot be used.
  213. */
  214. if (enable_shub_wars_1_1()) {
  215. /* open up everything */
  216. xpc_prot_vec[node] = (u64)HUB_L((u64 *)
  217. GLOBAL_MMR_ADDR
  218. (nasid,
  219. SH1_MD_DQLP_MMR_DIR_PRIVEC0));
  220. HUB_S((u64 *)
  221. GLOBAL_MMR_ADDR(nasid,
  222. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  223. -1UL);
  224. HUB_S((u64 *)
  225. GLOBAL_MMR_ADDR(nasid,
  226. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  227. -1UL);
  228. }
  229. }
  230. }
  231. }
  232. /*
  233. * Restrict protections to disallow IPI operations (and AMO operations on
  234. * Shub 1.1 systems).
  235. */
  236. void
  237. xpc_restrict_IPI_ops(void)
  238. {
  239. int node;
  240. int nasid;
  241. /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
  242. if (is_shub2()) {
  243. for_each_online_node(node) {
  244. nasid = cnodeid_to_nasid(node);
  245. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  246. xpc_sh2_IPI_access0);
  247. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  248. xpc_sh2_IPI_access1);
  249. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  250. xpc_sh2_IPI_access2);
  251. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  252. xpc_sh2_IPI_access3);
  253. }
  254. } else {
  255. for_each_online_node(node) {
  256. nasid = cnodeid_to_nasid(node);
  257. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  258. xpc_sh1_IPI_access);
  259. if (enable_shub_wars_1_1()) {
  260. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
  261. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  262. xpc_prot_vec[node]);
  263. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
  264. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  265. xpc_prot_vec[node]);
  266. }
  267. }
  268. }
  269. }
  270. /*
  271. * At periodic intervals, scan through all active partitions and ensure
  272. * their heartbeat is still active. If not, the partition is deactivated.
  273. */
  274. void
  275. xpc_check_remote_hb(void)
  276. {
  277. struct xpc_vars *remote_vars;
  278. struct xpc_partition *part;
  279. short partid;
  280. enum xp_retval ret;
  281. remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
  282. for (partid = 0; partid < xp_max_npartitions; partid++) {
  283. if (xpc_exiting)
  284. break;
  285. if (partid == sn_partition_id)
  286. continue;
  287. part = &xpc_partitions[partid];
  288. if (part->act_state == XPC_P_INACTIVE ||
  289. part->act_state == XPC_P_DEACTIVATING) {
  290. continue;
  291. }
  292. /* pull the remote_hb cache line */
  293. ret = xp_remote_memcpy(remote_vars,
  294. (void *)part->remote_vars_pa,
  295. XPC_RP_VARS_SIZE);
  296. if (ret != xpSuccess) {
  297. XPC_DEACTIVATE_PARTITION(part, ret);
  298. continue;
  299. }
  300. dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
  301. " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
  302. partid, remote_vars->heartbeat, part->last_heartbeat,
  303. remote_vars->heartbeat_offline,
  304. remote_vars->heartbeating_to_mask);
  305. if (((remote_vars->heartbeat == part->last_heartbeat) &&
  306. (remote_vars->heartbeat_offline == 0)) ||
  307. !xpc_hb_allowed(sn_partition_id, remote_vars)) {
  308. XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
  309. continue;
  310. }
  311. part->last_heartbeat = remote_vars->heartbeat;
  312. }
  313. }
  314. /*
  315. * Get a copy of a portion of the remote partition's rsvd page.
  316. *
  317. * remote_rp points to a buffer that is cacheline aligned for BTE copies and
  318. * is large enough to contain a copy of their reserved page header and
  319. * part_nasids mask.
  320. */
  321. static enum xp_retval
  322. xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
  323. struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
  324. {
  325. int i;
  326. enum xp_retval ret;
  327. /* get the reserved page's physical address */
  328. *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
  329. if (*remote_rp_pa == 0)
  330. return xpNoRsvdPageAddr;
  331. /* pull over the reserved page header and part_nasids mask */
  332. ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
  333. XPC_RP_HEADER_SIZE + xp_sizeof_nasid_mask);
  334. if (ret != xpSuccess)
  335. return ret;
  336. if (discovered_nasids != NULL) {
  337. u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
  338. for (i = 0; i < xp_nasid_mask_words; i++)
  339. discovered_nasids[i] |= remote_part_nasids[i];
  340. }
  341. /* check that the partid is valid and is for another partition */
  342. if (remote_rp->SAL_partid < 0 ||
  343. remote_rp->SAL_partid >= xp_max_npartitions) {
  344. return xpInvalidPartid;
  345. }
  346. if (remote_rp->SAL_partid == sn_partition_id)
  347. return xpLocalPartid;
  348. /* see if the rest of the reserved page has been set up by XPC */
  349. if (timespec_equal(&remote_rp->stamp, &ZERO_STAMP))
  350. return xpRsvdPageNotSet;
  351. if (XPC_VERSION_MAJOR(remote_rp->version) !=
  352. XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
  353. return xpBadVersion;
  354. }
  355. if (remote_rp->max_npartitions <= sn_partition_id)
  356. return xpInvalidPartid;
  357. return xpSuccess;
  358. }
  359. /*
  360. * Get a copy of the remote partition's XPC variables from the reserved page.
  361. *
  362. * remote_vars points to a buffer that is cacheline aligned for BTE copies and
  363. * assumed to be of size XPC_RP_VARS_SIZE.
  364. */
  365. static enum xp_retval
  366. xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
  367. {
  368. enum xp_retval ret;
  369. if (remote_vars_pa == 0)
  370. return xpVarsNotSet;
  371. /* pull over the cross partition variables */
  372. ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
  373. XPC_RP_VARS_SIZE);
  374. if (ret != xpSuccess)
  375. return ret;
  376. if (XPC_VERSION_MAJOR(remote_vars->version) !=
  377. XPC_VERSION_MAJOR(XPC_V_VERSION)) {
  378. return xpBadVersion;
  379. }
  380. return xpSuccess;
  381. }
  382. /*
  383. * Update the remote partition's info.
  384. */
  385. static void
  386. xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
  387. struct timespec *remote_rp_stamp, u64 remote_rp_pa,
  388. u64 remote_vars_pa, struct xpc_vars *remote_vars)
  389. {
  390. part->remote_rp_version = remote_rp_version;
  391. dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
  392. part->remote_rp_version);
  393. part->remote_rp_stamp = *remote_rp_stamp;
  394. dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
  395. part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
  396. part->remote_rp_pa = remote_rp_pa;
  397. dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
  398. part->remote_vars_pa = remote_vars_pa;
  399. dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
  400. part->remote_vars_pa);
  401. part->last_heartbeat = remote_vars->heartbeat;
  402. dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
  403. part->last_heartbeat);
  404. part->remote_vars_part_pa = remote_vars->vars_part_pa;
  405. dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
  406. part->remote_vars_part_pa);
  407. part->remote_act_nasid = remote_vars->act_nasid;
  408. dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
  409. part->remote_act_nasid);
  410. part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
  411. dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
  412. part->remote_act_phys_cpuid);
  413. part->remote_amos_page_pa = remote_vars->amos_page_pa;
  414. dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
  415. part->remote_amos_page_pa);
  416. part->remote_vars_version = remote_vars->version;
  417. dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
  418. part->remote_vars_version);
  419. }
  420. /*
  421. * Prior code has determined the nasid which generated an IPI. Inspect
  422. * that nasid to determine if its partition needs to be activated or
  423. * deactivated.
  424. *
  425. * A partition is consider "awaiting activation" if our partition
  426. * flags indicate it is not active and it has a heartbeat. A
  427. * partition is considered "awaiting deactivation" if our partition
  428. * flags indicate it is active but it has no heartbeat or it is not
  429. * sending its heartbeat to us.
  430. *
  431. * To determine the heartbeat, the remote nasid must have a properly
  432. * initialized reserved page.
  433. */
  434. static void
  435. xpc_identify_act_IRQ_req(int nasid)
  436. {
  437. struct xpc_rsvd_page *remote_rp;
  438. struct xpc_vars *remote_vars;
  439. u64 remote_rp_pa;
  440. u64 remote_vars_pa;
  441. int remote_rp_version;
  442. int reactivate = 0;
  443. int stamp_diff;
  444. struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
  445. short partid;
  446. struct xpc_partition *part;
  447. enum xp_retval ret;
  448. /* pull over the reserved page structure */
  449. remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
  450. ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
  451. if (ret != xpSuccess) {
  452. dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
  453. "which sent interrupt, reason=%d\n", nasid, ret);
  454. return;
  455. }
  456. remote_vars_pa = remote_rp->sn.vars_pa;
  457. remote_rp_version = remote_rp->version;
  458. if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
  459. remote_rp_stamp = remote_rp->stamp;
  460. partid = remote_rp->SAL_partid;
  461. part = &xpc_partitions[partid];
  462. /* pull over the cross partition variables */
  463. remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
  464. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  465. if (ret != xpSuccess) {
  466. dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
  467. "which sent interrupt, reason=%d\n", nasid, ret);
  468. XPC_DEACTIVATE_PARTITION(part, ret);
  469. return;
  470. }
  471. part->act_IRQ_rcvd++;
  472. dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
  473. "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
  474. remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
  475. if (xpc_partition_disengaged(part) &&
  476. part->act_state == XPC_P_INACTIVE) {
  477. xpc_update_partition_info(part, remote_rp_version,
  478. &remote_rp_stamp, remote_rp_pa,
  479. remote_vars_pa, remote_vars);
  480. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  481. if (xpc_partition_disengage_requested(1UL << partid)) {
  482. /*
  483. * Other side is waiting on us to disengage,
  484. * even though we already have.
  485. */
  486. return;
  487. }
  488. } else {
  489. /* other side doesn't support disengage requests */
  490. xpc_clear_partition_disengage_request(1UL << partid);
  491. }
  492. xpc_activate_partition(part);
  493. return;
  494. }
  495. DBUG_ON(part->remote_rp_version == 0);
  496. DBUG_ON(part->remote_vars_version == 0);
  497. if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
  498. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
  499. remote_vars_version));
  500. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  501. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  502. version));
  503. /* see if the other side rebooted */
  504. if (part->remote_amos_page_pa ==
  505. remote_vars->amos_page_pa &&
  506. xpc_hb_allowed(sn_partition_id, remote_vars)) {
  507. /* doesn't look that way, so ignore the IPI */
  508. return;
  509. }
  510. }
  511. /*
  512. * Other side rebooted and previous XPC didn't support the
  513. * disengage request, so we don't need to do anything special.
  514. */
  515. xpc_update_partition_info(part, remote_rp_version,
  516. &remote_rp_stamp, remote_rp_pa,
  517. remote_vars_pa, remote_vars);
  518. part->reactivate_nasid = nasid;
  519. XPC_DEACTIVATE_PARTITION(part, xpReactivating);
  520. return;
  521. }
  522. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
  523. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  524. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  525. /*
  526. * Other side rebooted and previous XPC did support the
  527. * disengage request, but the new one doesn't.
  528. */
  529. xpc_clear_partition_engaged(1UL << partid);
  530. xpc_clear_partition_disengage_request(1UL << partid);
  531. xpc_update_partition_info(part, remote_rp_version,
  532. &remote_rp_stamp, remote_rp_pa,
  533. remote_vars_pa, remote_vars);
  534. reactivate = 1;
  535. } else {
  536. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  537. stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
  538. &remote_rp_stamp);
  539. if (stamp_diff != 0) {
  540. DBUG_ON(stamp_diff >= 0);
  541. /*
  542. * Other side rebooted and the previous XPC did support
  543. * the disengage request, as does the new one.
  544. */
  545. DBUG_ON(xpc_partition_engaged(1UL << partid));
  546. DBUG_ON(xpc_partition_disengage_requested(1UL <<
  547. partid));
  548. xpc_update_partition_info(part, remote_rp_version,
  549. &remote_rp_stamp,
  550. remote_rp_pa, remote_vars_pa,
  551. remote_vars);
  552. reactivate = 1;
  553. }
  554. }
  555. if (part->disengage_request_timeout > 0 &&
  556. !xpc_partition_disengaged(part)) {
  557. /* still waiting on other side to disengage from us */
  558. return;
  559. }
  560. if (reactivate) {
  561. part->reactivate_nasid = nasid;
  562. XPC_DEACTIVATE_PARTITION(part, xpReactivating);
  563. } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
  564. xpc_partition_disengage_requested(1UL << partid)) {
  565. XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
  566. }
  567. }
  568. /*
  569. * Loop through the activation AMO variables and process any bits
  570. * which are set. Each bit indicates a nasid sending a partition
  571. * activation or deactivation request.
  572. *
  573. * Return #of IRQs detected.
  574. */
  575. int
  576. xpc_identify_act_IRQ_sender(void)
  577. {
  578. int word, bit;
  579. u64 nasid_mask;
  580. u64 nasid; /* remote nasid */
  581. int n_IRQs_detected = 0;
  582. AMO_t *act_amos;
  583. act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
  584. /* scan through act AMO variable looking for non-zero entries */
  585. for (word = 0; word < xp_nasid_mask_words; word++) {
  586. if (xpc_exiting)
  587. break;
  588. nasid_mask = xpc_IPI_receive(&act_amos[word]);
  589. if (nasid_mask == 0) {
  590. /* no IRQs from nasids in this variable */
  591. continue;
  592. }
  593. dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
  594. nasid_mask);
  595. /*
  596. * If this nasid has been added to the machine since
  597. * our partition was reset, this will retain the
  598. * remote nasid in our reserved pages machine mask.
  599. * This is used in the event of module reload.
  600. */
  601. xpc_mach_nasids[word] |= nasid_mask;
  602. /* locate the nasid(s) which sent interrupts */
  603. for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
  604. if (nasid_mask & (1UL << bit)) {
  605. n_IRQs_detected++;
  606. nasid = XPC_NASID_FROM_W_B(word, bit);
  607. dev_dbg(xpc_part, "interrupt from nasid %ld\n",
  608. nasid);
  609. xpc_identify_act_IRQ_req(nasid);
  610. }
  611. }
  612. }
  613. return n_IRQs_detected;
  614. }
  615. /*
  616. * See if the other side has responded to a partition disengage request
  617. * from us.
  618. */
  619. int
  620. xpc_partition_disengaged(struct xpc_partition *part)
  621. {
  622. short partid = XPC_PARTID(part);
  623. int disengaged;
  624. disengaged = (xpc_partition_engaged(1UL << partid) == 0);
  625. if (part->disengage_request_timeout) {
  626. if (!disengaged) {
  627. if (time_before(jiffies,
  628. part->disengage_request_timeout)) {
  629. /* timelimit hasn't been reached yet */
  630. return 0;
  631. }
  632. /*
  633. * Other side hasn't responded to our disengage
  634. * request in a timely fashion, so assume it's dead.
  635. */
  636. dev_info(xpc_part, "disengage from remote partition %d "
  637. "timed out\n", partid);
  638. xpc_disengage_request_timedout = 1;
  639. xpc_clear_partition_engaged(1UL << partid);
  640. disengaged = 1;
  641. }
  642. part->disengage_request_timeout = 0;
  643. /* cancel the timer function, provided it's not us */
  644. if (!in_interrupt()) {
  645. del_singleshot_timer_sync(&part->
  646. disengage_request_timer);
  647. }
  648. DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
  649. part->act_state != XPC_P_INACTIVE);
  650. if (part->act_state != XPC_P_INACTIVE)
  651. xpc_wakeup_channel_mgr(part);
  652. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
  653. xpc_cancel_partition_disengage_request(part);
  654. }
  655. return disengaged;
  656. }
  657. /*
  658. * Mark specified partition as active.
  659. */
  660. enum xp_retval
  661. xpc_mark_partition_active(struct xpc_partition *part)
  662. {
  663. unsigned long irq_flags;
  664. enum xp_retval ret;
  665. dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
  666. spin_lock_irqsave(&part->act_lock, irq_flags);
  667. if (part->act_state == XPC_P_ACTIVATING) {
  668. part->act_state = XPC_P_ACTIVE;
  669. ret = xpSuccess;
  670. } else {
  671. DBUG_ON(part->reason == xpSuccess);
  672. ret = part->reason;
  673. }
  674. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  675. return ret;
  676. }
  677. /*
  678. * Notify XPC that the partition is down.
  679. */
  680. void
  681. xpc_deactivate_partition(const int line, struct xpc_partition *part,
  682. enum xp_retval reason)
  683. {
  684. unsigned long irq_flags;
  685. spin_lock_irqsave(&part->act_lock, irq_flags);
  686. if (part->act_state == XPC_P_INACTIVE) {
  687. XPC_SET_REASON(part, reason, line);
  688. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  689. if (reason == xpReactivating) {
  690. /* we interrupt ourselves to reactivate partition */
  691. xpc_IPI_send_reactivate(part);
  692. }
  693. return;
  694. }
  695. if (part->act_state == XPC_P_DEACTIVATING) {
  696. if ((part->reason == xpUnloading && reason != xpUnloading) ||
  697. reason == xpReactivating) {
  698. XPC_SET_REASON(part, reason, line);
  699. }
  700. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  701. return;
  702. }
  703. part->act_state = XPC_P_DEACTIVATING;
  704. XPC_SET_REASON(part, reason, line);
  705. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  706. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  707. xpc_request_partition_disengage(part);
  708. xpc_IPI_send_disengage(part);
  709. /* set a timelimit on the disengage request */
  710. part->disengage_request_timeout = jiffies +
  711. (xpc_disengage_request_timelimit * HZ);
  712. part->disengage_request_timer.expires =
  713. part->disengage_request_timeout;
  714. add_timer(&part->disengage_request_timer);
  715. }
  716. dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
  717. XPC_PARTID(part), reason);
  718. xpc_partition_going_down(part, reason);
  719. }
  720. /*
  721. * Mark specified partition as inactive.
  722. */
  723. void
  724. xpc_mark_partition_inactive(struct xpc_partition *part)
  725. {
  726. unsigned long irq_flags;
  727. dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
  728. XPC_PARTID(part));
  729. spin_lock_irqsave(&part->act_lock, irq_flags);
  730. part->act_state = XPC_P_INACTIVE;
  731. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  732. part->remote_rp_pa = 0;
  733. }
  734. /*
  735. * SAL has provided a partition and machine mask. The partition mask
  736. * contains a bit for each even nasid in our partition. The machine
  737. * mask contains a bit for each even nasid in the entire machine.
  738. *
  739. * Using those two bit arrays, we can determine which nasids are
  740. * known in the machine. Each should also have a reserved page
  741. * initialized if they are available for partitioning.
  742. */
  743. void
  744. xpc_discovery(void)
  745. {
  746. void *remote_rp_base;
  747. struct xpc_rsvd_page *remote_rp;
  748. struct xpc_vars *remote_vars;
  749. u64 remote_rp_pa;
  750. u64 remote_vars_pa;
  751. int region;
  752. int region_size;
  753. int max_regions;
  754. int nasid;
  755. struct xpc_rsvd_page *rp;
  756. short partid;
  757. struct xpc_partition *part;
  758. u64 *discovered_nasids;
  759. enum xp_retval ret;
  760. remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
  761. xp_sizeof_nasid_mask,
  762. GFP_KERNEL, &remote_rp_base);
  763. if (remote_rp == NULL)
  764. return;
  765. remote_vars = (struct xpc_vars *)remote_rp;
  766. discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
  767. GFP_KERNEL);
  768. if (discovered_nasids == NULL) {
  769. kfree(remote_rp_base);
  770. return;
  771. }
  772. rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
  773. /*
  774. * The term 'region' in this context refers to the minimum number of
  775. * nodes that can comprise an access protection grouping. The access
  776. * protection is in regards to memory, IOI and IPI.
  777. */
  778. max_regions = 64;
  779. region_size = sn_region_size;
  780. switch (region_size) {
  781. case 128:
  782. max_regions *= 2;
  783. case 64:
  784. max_regions *= 2;
  785. case 32:
  786. max_regions *= 2;
  787. region_size = 16;
  788. DBUG_ON(!is_shub2());
  789. }
  790. for (region = 0; region < max_regions; region++) {
  791. if (xpc_exiting)
  792. break;
  793. dev_dbg(xpc_part, "searching region %d\n", region);
  794. for (nasid = (region * region_size * 2);
  795. nasid < ((region + 1) * region_size * 2); nasid += 2) {
  796. if (xpc_exiting)
  797. break;
  798. dev_dbg(xpc_part, "checking nasid %d\n", nasid);
  799. if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
  800. dev_dbg(xpc_part, "PROM indicates Nasid %d is "
  801. "part of the local partition; skipping "
  802. "region\n", nasid);
  803. break;
  804. }
  805. if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
  806. dev_dbg(xpc_part, "PROM indicates Nasid %d was "
  807. "not on Numa-Link network at reset\n",
  808. nasid);
  809. continue;
  810. }
  811. if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
  812. dev_dbg(xpc_part, "Nasid %d is part of a "
  813. "partition which was previously "
  814. "discovered\n", nasid);
  815. continue;
  816. }
  817. /* pull over the reserved page structure */
  818. ret = xpc_get_remote_rp(nasid, discovered_nasids,
  819. remote_rp, &remote_rp_pa);
  820. if (ret != xpSuccess) {
  821. dev_dbg(xpc_part, "unable to get reserved page "
  822. "from nasid %d, reason=%d\n", nasid,
  823. ret);
  824. if (ret == xpLocalPartid)
  825. break;
  826. continue;
  827. }
  828. remote_vars_pa = remote_rp->sn.vars_pa;
  829. partid = remote_rp->SAL_partid;
  830. part = &xpc_partitions[partid];
  831. /* pull over the cross partition variables */
  832. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  833. if (ret != xpSuccess) {
  834. dev_dbg(xpc_part, "unable to get XPC variables "
  835. "from nasid %d, reason=%d\n", nasid,
  836. ret);
  837. XPC_DEACTIVATE_PARTITION(part, ret);
  838. continue;
  839. }
  840. if (part->act_state != XPC_P_INACTIVE) {
  841. dev_dbg(xpc_part, "partition %d on nasid %d is "
  842. "already activating\n", partid, nasid);
  843. break;
  844. }
  845. /*
  846. * Register the remote partition's AMOs with SAL so it
  847. * can handle and cleanup errors within that address
  848. * range should the remote partition go down. We don't
  849. * unregister this range because it is difficult to
  850. * tell when outstanding writes to the remote partition
  851. * are finished and thus when it is thus safe to
  852. * unregister. This should not result in wasted space
  853. * in the SAL xp_addr_region table because we should
  854. * get the same page for remote_act_amos_pa after
  855. * module reloads and system reboots.
  856. */
  857. if (sn_register_xp_addr_region
  858. (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
  859. dev_dbg(xpc_part,
  860. "partition %d failed to "
  861. "register xp_addr region 0x%016lx\n",
  862. partid, remote_vars->amos_page_pa);
  863. XPC_SET_REASON(part, xpPhysAddrRegFailed,
  864. __LINE__);
  865. break;
  866. }
  867. /*
  868. * The remote nasid is valid and available.
  869. * Send an interrupt to that nasid to notify
  870. * it that we are ready to begin activation.
  871. */
  872. dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
  873. "nasid %d, phys_cpuid 0x%x\n",
  874. remote_vars->amos_page_pa,
  875. remote_vars->act_nasid,
  876. remote_vars->act_phys_cpuid);
  877. if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  878. version)) {
  879. part->remote_amos_page_pa =
  880. remote_vars->amos_page_pa;
  881. xpc_mark_partition_disengaged(part);
  882. xpc_cancel_partition_disengage_request(part);
  883. }
  884. xpc_IPI_send_activate(remote_vars);
  885. }
  886. }
  887. kfree(discovered_nasids);
  888. kfree(remote_rp_base);
  889. }
  890. /*
  891. * Given a partid, get the nasids owned by that partition from the
  892. * remote partition's reserved page.
  893. */
  894. enum xp_retval
  895. xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
  896. {
  897. struct xpc_partition *part;
  898. u64 part_nasid_pa;
  899. part = &xpc_partitions[partid];
  900. if (part->remote_rp_pa == 0)
  901. return xpPartitionDown;
  902. memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
  903. part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
  904. return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
  905. xp_sizeof_nasid_mask);
  906. }