xpc_partition.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. /*
  9. * Cross Partition Communication (XPC) partition support.
  10. *
  11. * This is the part of XPC that detects the presence/absence of
  12. * other partitions. It provides a heartbeat and monitors the
  13. * heartbeats of other partitions.
  14. *
  15. */
  16. #include <linux/kernel.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/cache.h>
  19. #include <linux/mmzone.h>
  20. #include <linux/nodemask.h>
  21. #include <asm/uncached.h>
  22. #include <asm/sn/bte.h>
  23. #include <asm/sn/intr.h>
  24. #include <asm/sn/sn_sal.h>
  25. #include <asm/sn/nodepda.h>
  26. #include <asm/sn/addrs.h>
  27. #include <asm/sn/xpc.h>
  28. /* XPC is exiting flag */
  29. int xpc_exiting;
  30. /* SH_IPI_ACCESS shub register value on startup */
  31. static u64 xpc_sh1_IPI_access;
  32. static u64 xpc_sh2_IPI_access0;
  33. static u64 xpc_sh2_IPI_access1;
  34. static u64 xpc_sh2_IPI_access2;
  35. static u64 xpc_sh2_IPI_access3;
  36. /* original protection values for each node */
  37. u64 xpc_prot_vec[MAX_NUMNODES];
  38. /* this partition's reserved page pointers */
  39. struct xpc_rsvd_page *xpc_rsvd_page;
  40. static u64 *xpc_part_nasids;
  41. static u64 *xpc_mach_nasids;
  42. struct xpc_vars *xpc_vars;
  43. struct xpc_vars_part *xpc_vars_part;
  44. static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
  45. static int xp_nasid_mask_words; /* actual size in words of nasid mask */
  46. /*
  47. * For performance reasons, each entry of xpc_partitions[] is cacheline
  48. * aligned. And xpc_partitions[] is padded with an additional entry at the
  49. * end so that the last legitimate entry doesn't share its cacheline with
  50. * another variable.
  51. */
  52. struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
  53. /*
  54. * Generic buffer used to store a local copy of portions of a remote
  55. * partition's reserved page (either its header and part_nasids mask,
  56. * or its vars).
  57. */
  58. char *xpc_remote_copy_buffer;
  59. void *xpc_remote_copy_buffer_base;
  60. /*
  61. * Guarantee that the kmalloc'd memory is cacheline aligned.
  62. */
  63. void *
  64. xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  65. {
  66. /* see if kmalloc will give us cachline aligned memory by default */
  67. *base = kmalloc(size, flags);
  68. if (*base == NULL) {
  69. return NULL;
  70. }
  71. if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
  72. return *base;
  73. }
  74. kfree(*base);
  75. /* nope, we'll have to do it ourselves */
  76. *base = kmalloc(size + L1_CACHE_BYTES, flags);
  77. if (*base == NULL) {
  78. return NULL;
  79. }
  80. return (void *) L1_CACHE_ALIGN((u64) *base);
  81. }
  82. /*
  83. * Given a nasid, get the physical address of the partition's reserved page
  84. * for that nasid. This function returns 0 on any error.
  85. */
  86. static u64
  87. xpc_get_rsvd_page_pa(int nasid)
  88. {
  89. bte_result_t bte_res;
  90. s64 status;
  91. u64 cookie = 0;
  92. u64 rp_pa = nasid; /* seed with nasid */
  93. u64 len = 0;
  94. u64 buf = buf;
  95. u64 buf_len = 0;
  96. void *buf_base = NULL;
  97. while (1) {
  98. status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
  99. &len);
  100. dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
  101. "0x%016lx, address=0x%016lx, len=0x%016lx\n",
  102. status, cookie, rp_pa, len);
  103. if (status != SALRET_MORE_PASSES) {
  104. break;
  105. }
  106. if (L1_CACHE_ALIGN(len) > buf_len) {
  107. kfree(buf_base);
  108. buf_len = L1_CACHE_ALIGN(len);
  109. buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
  110. GFP_KERNEL, &buf_base);
  111. if (buf_base == NULL) {
  112. dev_err(xpc_part, "unable to kmalloc "
  113. "len=0x%016lx\n", buf_len);
  114. status = SALRET_ERROR;
  115. break;
  116. }
  117. }
  118. bte_res = xp_bte_copy(rp_pa, buf, buf_len,
  119. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  120. if (bte_res != BTE_SUCCESS) {
  121. dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
  122. status = SALRET_ERROR;
  123. break;
  124. }
  125. }
  126. kfree(buf_base);
  127. if (status != SALRET_OK) {
  128. rp_pa = 0;
  129. }
  130. dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
  131. return rp_pa;
  132. }
  133. /*
  134. * Fill the partition reserved page with the information needed by
  135. * other partitions to discover we are alive and establish initial
  136. * communications.
  137. */
  138. struct xpc_rsvd_page *
  139. xpc_rsvd_page_init(void)
  140. {
  141. struct xpc_rsvd_page *rp;
  142. AMO_t *amos_page;
  143. u64 rp_pa, nasid_array = 0;
  144. int i, ret;
  145. /* get the local reserved page's address */
  146. preempt_disable();
  147. rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
  148. preempt_enable();
  149. if (rp_pa == 0) {
  150. dev_err(xpc_part, "SAL failed to locate the reserved page\n");
  151. return NULL;
  152. }
  153. rp = (struct xpc_rsvd_page *) __va(rp_pa);
  154. if (rp->partid != sn_partition_id) {
  155. dev_err(xpc_part, "the reserved page's partid of %d should be "
  156. "%d\n", rp->partid, sn_partition_id);
  157. return NULL;
  158. }
  159. rp->version = XPC_RP_VERSION;
  160. /* establish the actual sizes of the nasid masks */
  161. if (rp->SAL_version == 1) {
  162. /* SAL_version 1 didn't set the nasids_size field */
  163. rp->nasids_size = 128;
  164. }
  165. xp_nasid_mask_bytes = rp->nasids_size;
  166. xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
  167. /* setup the pointers to the various items in the reserved page */
  168. xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
  169. xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
  170. xpc_vars = XPC_RP_VARS(rp);
  171. xpc_vars_part = XPC_RP_VARS_PART(rp);
  172. /*
  173. * Before clearing xpc_vars, see if a page of AMOs had been previously
  174. * allocated. If not we'll need to allocate one and set permissions
  175. * so that cross-partition AMOs are allowed.
  176. *
  177. * The allocated AMO page needs MCA reporting to remain disabled after
  178. * XPC has unloaded. To make this work, we keep a copy of the pointer
  179. * to this page (i.e., amos_page) in the struct xpc_vars structure,
  180. * which is pointed to by the reserved page, and re-use that saved copy
  181. * on subsequent loads of XPC. This AMO page is never freed, and its
  182. * memory protections are never restricted.
  183. */
  184. if ((amos_page = xpc_vars->amos_page) == NULL) {
  185. amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
  186. if (amos_page == NULL) {
  187. dev_err(xpc_part, "can't allocate page of AMOs\n");
  188. return NULL;
  189. }
  190. /*
  191. * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
  192. * when xpc_allow_IPI_ops() is called via xpc_hb_init().
  193. */
  194. if (!enable_shub_wars_1_1()) {
  195. ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
  196. PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
  197. &nasid_array);
  198. if (ret != 0) {
  199. dev_err(xpc_part, "can't change memory "
  200. "protections\n");
  201. uncached_free_page(__IA64_UNCACHED_OFFSET |
  202. TO_PHYS((u64) amos_page));
  203. return NULL;
  204. }
  205. }
  206. } else if (!IS_AMO_ADDRESS((u64) amos_page)) {
  207. /*
  208. * EFI's XPBOOT can also set amos_page in the reserved page,
  209. * but it happens to leave it as an uncached physical address
  210. * and we need it to be an uncached virtual, so we'll have to
  211. * convert it.
  212. */
  213. if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
  214. dev_err(xpc_part, "previously used amos_page address "
  215. "is bad = 0x%p\n", (void *) amos_page);
  216. return NULL;
  217. }
  218. amos_page = (AMO_t *) TO_AMO((u64) amos_page);
  219. }
  220. /* clear xpc_vars */
  221. memset(xpc_vars, 0, sizeof(struct xpc_vars));
  222. xpc_vars->version = XPC_V_VERSION;
  223. xpc_vars->act_nasid = cpuid_to_nasid(0);
  224. xpc_vars->act_phys_cpuid = cpu_physical_id(0);
  225. xpc_vars->vars_part_pa = __pa(xpc_vars_part);
  226. xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
  227. xpc_vars->amos_page = amos_page; /* save for next load of XPC */
  228. /* clear xpc_vars_part */
  229. memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
  230. XP_MAX_PARTITIONS);
  231. /* initialize the activate IRQ related AMO variables */
  232. for (i = 0; i < xp_nasid_mask_words; i++) {
  233. (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
  234. }
  235. /* initialize the engaged remote partitions related AMO variables */
  236. (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
  237. (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
  238. /* timestamp of when reserved page was setup by XPC */
  239. rp->stamp = CURRENT_TIME;
  240. /*
  241. * This signifies to the remote partition that our reserved
  242. * page is initialized.
  243. */
  244. rp->vars_pa = __pa(xpc_vars);
  245. return rp;
  246. }
  247. /*
  248. * Change protections to allow IPI operations (and AMO operations on
  249. * Shub 1.1 systems).
  250. */
  251. void
  252. xpc_allow_IPI_ops(void)
  253. {
  254. int node;
  255. int nasid;
  256. // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
  257. if (is_shub2()) {
  258. xpc_sh2_IPI_access0 =
  259. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
  260. xpc_sh2_IPI_access1 =
  261. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
  262. xpc_sh2_IPI_access2 =
  263. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
  264. xpc_sh2_IPI_access3 =
  265. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
  266. for_each_online_node(node) {
  267. nasid = cnodeid_to_nasid(node);
  268. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  269. -1UL);
  270. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  271. -1UL);
  272. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  273. -1UL);
  274. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  275. -1UL);
  276. }
  277. } else {
  278. xpc_sh1_IPI_access =
  279. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
  280. for_each_online_node(node) {
  281. nasid = cnodeid_to_nasid(node);
  282. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  283. -1UL);
  284. /*
  285. * Since the BIST collides with memory operations on
  286. * SHUB 1.1 sn_change_memprotect() cannot be used.
  287. */
  288. if (enable_shub_wars_1_1()) {
  289. /* open up everything */
  290. xpc_prot_vec[node] = (u64) HUB_L((u64 *)
  291. GLOBAL_MMR_ADDR(nasid,
  292. SH1_MD_DQLP_MMR_DIR_PRIVEC0));
  293. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  294. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  295. -1UL);
  296. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  297. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  298. -1UL);
  299. }
  300. }
  301. }
  302. }
  303. /*
  304. * Restrict protections to disallow IPI operations (and AMO operations on
  305. * Shub 1.1 systems).
  306. */
  307. void
  308. xpc_restrict_IPI_ops(void)
  309. {
  310. int node;
  311. int nasid;
  312. // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
  313. if (is_shub2()) {
  314. for_each_online_node(node) {
  315. nasid = cnodeid_to_nasid(node);
  316. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  317. xpc_sh2_IPI_access0);
  318. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  319. xpc_sh2_IPI_access1);
  320. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  321. xpc_sh2_IPI_access2);
  322. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  323. xpc_sh2_IPI_access3);
  324. }
  325. } else {
  326. for_each_online_node(node) {
  327. nasid = cnodeid_to_nasid(node);
  328. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  329. xpc_sh1_IPI_access);
  330. if (enable_shub_wars_1_1()) {
  331. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  332. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  333. xpc_prot_vec[node]);
  334. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  335. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  336. xpc_prot_vec[node]);
  337. }
  338. }
  339. }
  340. }
  341. /*
  342. * At periodic intervals, scan through all active partitions and ensure
  343. * their heartbeat is still active. If not, the partition is deactivated.
  344. */
  345. void
  346. xpc_check_remote_hb(void)
  347. {
  348. struct xpc_vars *remote_vars;
  349. struct xpc_partition *part;
  350. partid_t partid;
  351. bte_result_t bres;
  352. remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
  353. for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
  354. if (xpc_exiting) {
  355. break;
  356. }
  357. if (partid == sn_partition_id) {
  358. continue;
  359. }
  360. part = &xpc_partitions[partid];
  361. if (part->act_state == XPC_P_INACTIVE ||
  362. part->act_state == XPC_P_DEACTIVATING) {
  363. continue;
  364. }
  365. /* pull the remote_hb cache line */
  366. bres = xp_bte_copy(part->remote_vars_pa,
  367. (u64) remote_vars,
  368. XPC_RP_VARS_SIZE,
  369. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  370. if (bres != BTE_SUCCESS) {
  371. XPC_DEACTIVATE_PARTITION(part,
  372. xpc_map_bte_errors(bres));
  373. continue;
  374. }
  375. dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
  376. " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
  377. partid, remote_vars->heartbeat, part->last_heartbeat,
  378. remote_vars->heartbeat_offline,
  379. remote_vars->heartbeating_to_mask);
  380. if (((remote_vars->heartbeat == part->last_heartbeat) &&
  381. (remote_vars->heartbeat_offline == 0)) ||
  382. !xpc_hb_allowed(sn_partition_id, remote_vars)) {
  383. XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
  384. continue;
  385. }
  386. part->last_heartbeat = remote_vars->heartbeat;
  387. }
  388. }
  389. /*
  390. * Get a copy of a portion of the remote partition's rsvd page.
  391. *
  392. * remote_rp points to a buffer that is cacheline aligned for BTE copies and
  393. * is large enough to contain a copy of their reserved page header and
  394. * part_nasids mask.
  395. */
  396. static enum xpc_retval
  397. xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
  398. struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
  399. {
  400. int bres, i;
  401. /* get the reserved page's physical address */
  402. *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
  403. if (*remote_rp_pa == 0) {
  404. return xpcNoRsvdPageAddr;
  405. }
  406. /* pull over the reserved page header and part_nasids mask */
  407. bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
  408. XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
  409. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  410. if (bres != BTE_SUCCESS) {
  411. return xpc_map_bte_errors(bres);
  412. }
  413. if (discovered_nasids != NULL) {
  414. u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
  415. for (i = 0; i < xp_nasid_mask_words; i++) {
  416. discovered_nasids[i] |= remote_part_nasids[i];
  417. }
  418. }
  419. /* check that the partid is for another partition */
  420. if (remote_rp->partid < 1 ||
  421. remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
  422. return xpcInvalidPartid;
  423. }
  424. if (remote_rp->partid == sn_partition_id) {
  425. return xpcLocalPartid;
  426. }
  427. if (XPC_VERSION_MAJOR(remote_rp->version) !=
  428. XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
  429. return xpcBadVersion;
  430. }
  431. return xpcSuccess;
  432. }
  433. /*
  434. * Get a copy of the remote partition's XPC variables from the reserved page.
  435. *
  436. * remote_vars points to a buffer that is cacheline aligned for BTE copies and
  437. * assumed to be of size XPC_RP_VARS_SIZE.
  438. */
  439. static enum xpc_retval
  440. xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
  441. {
  442. int bres;
  443. if (remote_vars_pa == 0) {
  444. return xpcVarsNotSet;
  445. }
  446. /* pull over the cross partition variables */
  447. bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
  448. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  449. if (bres != BTE_SUCCESS) {
  450. return xpc_map_bte_errors(bres);
  451. }
  452. if (XPC_VERSION_MAJOR(remote_vars->version) !=
  453. XPC_VERSION_MAJOR(XPC_V_VERSION)) {
  454. return xpcBadVersion;
  455. }
  456. return xpcSuccess;
  457. }
  458. /*
  459. * Update the remote partition's info.
  460. */
  461. static void
  462. xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
  463. struct timespec *remote_rp_stamp, u64 remote_rp_pa,
  464. u64 remote_vars_pa, struct xpc_vars *remote_vars)
  465. {
  466. part->remote_rp_version = remote_rp_version;
  467. dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n",
  468. part->remote_rp_version);
  469. part->remote_rp_stamp = *remote_rp_stamp;
  470. dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
  471. part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
  472. part->remote_rp_pa = remote_rp_pa;
  473. dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
  474. part->remote_vars_pa = remote_vars_pa;
  475. dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
  476. part->remote_vars_pa);
  477. part->last_heartbeat = remote_vars->heartbeat;
  478. dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
  479. part->last_heartbeat);
  480. part->remote_vars_part_pa = remote_vars->vars_part_pa;
  481. dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
  482. part->remote_vars_part_pa);
  483. part->remote_act_nasid = remote_vars->act_nasid;
  484. dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
  485. part->remote_act_nasid);
  486. part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
  487. dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
  488. part->remote_act_phys_cpuid);
  489. part->remote_amos_page_pa = remote_vars->amos_page_pa;
  490. dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
  491. part->remote_amos_page_pa);
  492. part->remote_vars_version = remote_vars->version;
  493. dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
  494. part->remote_vars_version);
  495. }
  496. /*
  497. * Prior code has determined the nasid which generated an IPI. Inspect
  498. * that nasid to determine if its partition needs to be activated or
  499. * deactivated.
  500. *
  501. * A partition is consider "awaiting activation" if our partition
  502. * flags indicate it is not active and it has a heartbeat. A
  503. * partition is considered "awaiting deactivation" if our partition
  504. * flags indicate it is active but it has no heartbeat or it is not
  505. * sending its heartbeat to us.
  506. *
  507. * To determine the heartbeat, the remote nasid must have a properly
  508. * initialized reserved page.
  509. */
  510. static void
  511. xpc_identify_act_IRQ_req(int nasid)
  512. {
  513. struct xpc_rsvd_page *remote_rp;
  514. struct xpc_vars *remote_vars;
  515. u64 remote_rp_pa;
  516. u64 remote_vars_pa;
  517. int remote_rp_version;
  518. int reactivate = 0;
  519. int stamp_diff;
  520. struct timespec remote_rp_stamp = { 0, 0 };
  521. partid_t partid;
  522. struct xpc_partition *part;
  523. enum xpc_retval ret;
  524. /* pull over the reserved page structure */
  525. remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
  526. ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
  527. if (ret != xpcSuccess) {
  528. dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
  529. "which sent interrupt, reason=%d\n", nasid, ret);
  530. return;
  531. }
  532. remote_vars_pa = remote_rp->vars_pa;
  533. remote_rp_version = remote_rp->version;
  534. if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  535. remote_rp_stamp = remote_rp->stamp;
  536. }
  537. partid = remote_rp->partid;
  538. part = &xpc_partitions[partid];
  539. /* pull over the cross partition variables */
  540. remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
  541. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  542. if (ret != xpcSuccess) {
  543. dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
  544. "which sent interrupt, reason=%d\n", nasid, ret);
  545. XPC_DEACTIVATE_PARTITION(part, ret);
  546. return;
  547. }
  548. part->act_IRQ_rcvd++;
  549. dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
  550. "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
  551. remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
  552. if (xpc_partition_disengaged(part) &&
  553. part->act_state == XPC_P_INACTIVE) {
  554. xpc_update_partition_info(part, remote_rp_version,
  555. &remote_rp_stamp, remote_rp_pa,
  556. remote_vars_pa, remote_vars);
  557. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  558. if (xpc_partition_disengage_requested(1UL << partid)) {
  559. /*
  560. * Other side is waiting on us to disengage,
  561. * even though we already have.
  562. */
  563. return;
  564. }
  565. } else {
  566. /* other side doesn't support disengage requests */
  567. xpc_clear_partition_disengage_request(1UL << partid);
  568. }
  569. xpc_activate_partition(part);
  570. return;
  571. }
  572. DBUG_ON(part->remote_rp_version == 0);
  573. DBUG_ON(part->remote_vars_version == 0);
  574. if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
  575. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
  576. remote_vars_version));
  577. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  578. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  579. version));
  580. /* see if the other side rebooted */
  581. if (part->remote_amos_page_pa ==
  582. remote_vars->amos_page_pa &&
  583. xpc_hb_allowed(sn_partition_id,
  584. remote_vars)) {
  585. /* doesn't look that way, so ignore the IPI */
  586. return;
  587. }
  588. }
  589. /*
  590. * Other side rebooted and previous XPC didn't support the
  591. * disengage request, so we don't need to do anything special.
  592. */
  593. xpc_update_partition_info(part, remote_rp_version,
  594. &remote_rp_stamp, remote_rp_pa,
  595. remote_vars_pa, remote_vars);
  596. part->reactivate_nasid = nasid;
  597. XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
  598. return;
  599. }
  600. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
  601. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  602. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  603. /*
  604. * Other side rebooted and previous XPC did support the
  605. * disengage request, but the new one doesn't.
  606. */
  607. xpc_clear_partition_engaged(1UL << partid);
  608. xpc_clear_partition_disengage_request(1UL << partid);
  609. xpc_update_partition_info(part, remote_rp_version,
  610. &remote_rp_stamp, remote_rp_pa,
  611. remote_vars_pa, remote_vars);
  612. reactivate = 1;
  613. } else {
  614. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  615. stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
  616. &remote_rp_stamp);
  617. if (stamp_diff != 0) {
  618. DBUG_ON(stamp_diff >= 0);
  619. /*
  620. * Other side rebooted and the previous XPC did support
  621. * the disengage request, as does the new one.
  622. */
  623. DBUG_ON(xpc_partition_engaged(1UL << partid));
  624. DBUG_ON(xpc_partition_disengage_requested(1UL <<
  625. partid));
  626. xpc_update_partition_info(part, remote_rp_version,
  627. &remote_rp_stamp, remote_rp_pa,
  628. remote_vars_pa, remote_vars);
  629. reactivate = 1;
  630. }
  631. }
  632. if (part->disengage_request_timeout > 0 &&
  633. !xpc_partition_disengaged(part)) {
  634. /* still waiting on other side to disengage from us */
  635. return;
  636. }
  637. if (reactivate) {
  638. part->reactivate_nasid = nasid;
  639. XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
  640. } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
  641. xpc_partition_disengage_requested(1UL << partid)) {
  642. XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
  643. }
  644. }
  645. /*
  646. * Loop through the activation AMO variables and process any bits
  647. * which are set. Each bit indicates a nasid sending a partition
  648. * activation or deactivation request.
  649. *
  650. * Return #of IRQs detected.
  651. */
  652. int
  653. xpc_identify_act_IRQ_sender(void)
  654. {
  655. int word, bit;
  656. u64 nasid_mask;
  657. u64 nasid; /* remote nasid */
  658. int n_IRQs_detected = 0;
  659. AMO_t *act_amos;
  660. act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
  661. /* scan through act AMO variable looking for non-zero entries */
  662. for (word = 0; word < xp_nasid_mask_words; word++) {
  663. if (xpc_exiting) {
  664. break;
  665. }
  666. nasid_mask = xpc_IPI_receive(&act_amos[word]);
  667. if (nasid_mask == 0) {
  668. /* no IRQs from nasids in this variable */
  669. continue;
  670. }
  671. dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
  672. nasid_mask);
  673. /*
  674. * If this nasid has been added to the machine since
  675. * our partition was reset, this will retain the
  676. * remote nasid in our reserved pages machine mask.
  677. * This is used in the event of module reload.
  678. */
  679. xpc_mach_nasids[word] |= nasid_mask;
  680. /* locate the nasid(s) which sent interrupts */
  681. for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
  682. if (nasid_mask & (1UL << bit)) {
  683. n_IRQs_detected++;
  684. nasid = XPC_NASID_FROM_W_B(word, bit);
  685. dev_dbg(xpc_part, "interrupt from nasid %ld\n",
  686. nasid);
  687. xpc_identify_act_IRQ_req(nasid);
  688. }
  689. }
  690. }
  691. return n_IRQs_detected;
  692. }
  693. /*
  694. * See if the other side has responded to a partition disengage request
  695. * from us.
  696. */
  697. int
  698. xpc_partition_disengaged(struct xpc_partition *part)
  699. {
  700. partid_t partid = XPC_PARTID(part);
  701. int disengaged;
  702. disengaged = (xpc_partition_engaged(1UL << partid) == 0);
  703. if (part->disengage_request_timeout) {
  704. if (!disengaged) {
  705. if (jiffies < part->disengage_request_timeout) {
  706. /* timelimit hasn't been reached yet */
  707. return 0;
  708. }
  709. /*
  710. * Other side hasn't responded to our disengage
  711. * request in a timely fashion, so assume it's dead.
  712. */
  713. dev_info(xpc_part, "disengage from remote partition %d "
  714. "timed out\n", partid);
  715. xpc_disengage_request_timedout = 1;
  716. xpc_clear_partition_engaged(1UL << partid);
  717. disengaged = 1;
  718. }
  719. part->disengage_request_timeout = 0;
  720. /* cancel the timer function, provided it's not us */
  721. if (!in_interrupt()) {
  722. del_singleshot_timer_sync(&part->
  723. disengage_request_timer);
  724. }
  725. DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
  726. part->act_state != XPC_P_INACTIVE);
  727. if (part->act_state != XPC_P_INACTIVE) {
  728. xpc_wakeup_channel_mgr(part);
  729. }
  730. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  731. xpc_cancel_partition_disengage_request(part);
  732. }
  733. }
  734. return disengaged;
  735. }
  736. /*
  737. * Mark specified partition as active.
  738. */
  739. enum xpc_retval
  740. xpc_mark_partition_active(struct xpc_partition *part)
  741. {
  742. unsigned long irq_flags;
  743. enum xpc_retval ret;
  744. dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
  745. spin_lock_irqsave(&part->act_lock, irq_flags);
  746. if (part->act_state == XPC_P_ACTIVATING) {
  747. part->act_state = XPC_P_ACTIVE;
  748. ret = xpcSuccess;
  749. } else {
  750. DBUG_ON(part->reason == xpcSuccess);
  751. ret = part->reason;
  752. }
  753. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  754. return ret;
  755. }
  756. /*
  757. * Notify XPC that the partition is down.
  758. */
  759. void
  760. xpc_deactivate_partition(const int line, struct xpc_partition *part,
  761. enum xpc_retval reason)
  762. {
  763. unsigned long irq_flags;
  764. spin_lock_irqsave(&part->act_lock, irq_flags);
  765. if (part->act_state == XPC_P_INACTIVE) {
  766. XPC_SET_REASON(part, reason, line);
  767. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  768. if (reason == xpcReactivating) {
  769. /* we interrupt ourselves to reactivate partition */
  770. xpc_IPI_send_reactivate(part);
  771. }
  772. return;
  773. }
  774. if (part->act_state == XPC_P_DEACTIVATING) {
  775. if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
  776. reason == xpcReactivating) {
  777. XPC_SET_REASON(part, reason, line);
  778. }
  779. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  780. return;
  781. }
  782. part->act_state = XPC_P_DEACTIVATING;
  783. XPC_SET_REASON(part, reason, line);
  784. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  785. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  786. xpc_request_partition_disengage(part);
  787. xpc_IPI_send_disengage(part);
  788. /* set a timelimit on the disengage request */
  789. part->disengage_request_timeout = jiffies +
  790. (xpc_disengage_request_timelimit * HZ);
  791. part->disengage_request_timer.expires =
  792. part->disengage_request_timeout;
  793. add_timer(&part->disengage_request_timer);
  794. }
  795. dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
  796. XPC_PARTID(part), reason);
  797. xpc_partition_going_down(part, reason);
  798. }
  799. /*
  800. * Mark specified partition as inactive.
  801. */
  802. void
  803. xpc_mark_partition_inactive(struct xpc_partition *part)
  804. {
  805. unsigned long irq_flags;
  806. dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
  807. XPC_PARTID(part));
  808. spin_lock_irqsave(&part->act_lock, irq_flags);
  809. part->act_state = XPC_P_INACTIVE;
  810. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  811. part->remote_rp_pa = 0;
  812. }
  813. /*
  814. * SAL has provided a partition and machine mask. The partition mask
  815. * contains a bit for each even nasid in our partition. The machine
  816. * mask contains a bit for each even nasid in the entire machine.
  817. *
  818. * Using those two bit arrays, we can determine which nasids are
  819. * known in the machine. Each should also have a reserved page
  820. * initialized if they are available for partitioning.
  821. */
  822. void
  823. xpc_discovery(void)
  824. {
  825. void *remote_rp_base;
  826. struct xpc_rsvd_page *remote_rp;
  827. struct xpc_vars *remote_vars;
  828. u64 remote_rp_pa;
  829. u64 remote_vars_pa;
  830. int region;
  831. int region_size;
  832. int max_regions;
  833. int nasid;
  834. struct xpc_rsvd_page *rp;
  835. partid_t partid;
  836. struct xpc_partition *part;
  837. u64 *discovered_nasids;
  838. enum xpc_retval ret;
  839. remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
  840. xp_nasid_mask_bytes,
  841. GFP_KERNEL, &remote_rp_base);
  842. if (remote_rp == NULL) {
  843. return;
  844. }
  845. remote_vars = (struct xpc_vars *) remote_rp;
  846. discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
  847. GFP_KERNEL);
  848. if (discovered_nasids == NULL) {
  849. kfree(remote_rp_base);
  850. return;
  851. }
  852. rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
  853. /*
  854. * The term 'region' in this context refers to the minimum number of
  855. * nodes that can comprise an access protection grouping. The access
  856. * protection is in regards to memory, IOI and IPI.
  857. */
  858. max_regions = 64;
  859. region_size = sn_region_size;
  860. switch (region_size) {
  861. case 128:
  862. max_regions *= 2;
  863. case 64:
  864. max_regions *= 2;
  865. case 32:
  866. max_regions *= 2;
  867. region_size = 16;
  868. DBUG_ON(!is_shub2());
  869. }
  870. for (region = 0; region < max_regions; region++) {
  871. if ((volatile int) xpc_exiting) {
  872. break;
  873. }
  874. dev_dbg(xpc_part, "searching region %d\n", region);
  875. for (nasid = (region * region_size * 2);
  876. nasid < ((region + 1) * region_size * 2);
  877. nasid += 2) {
  878. if ((volatile int) xpc_exiting) {
  879. break;
  880. }
  881. dev_dbg(xpc_part, "checking nasid %d\n", nasid);
  882. if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
  883. dev_dbg(xpc_part, "PROM indicates Nasid %d is "
  884. "part of the local partition; skipping "
  885. "region\n", nasid);
  886. break;
  887. }
  888. if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
  889. dev_dbg(xpc_part, "PROM indicates Nasid %d was "
  890. "not on Numa-Link network at reset\n",
  891. nasid);
  892. continue;
  893. }
  894. if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
  895. dev_dbg(xpc_part, "Nasid %d is part of a "
  896. "partition which was previously "
  897. "discovered\n", nasid);
  898. continue;
  899. }
  900. /* pull over the reserved page structure */
  901. ret = xpc_get_remote_rp(nasid, discovered_nasids,
  902. remote_rp, &remote_rp_pa);
  903. if (ret != xpcSuccess) {
  904. dev_dbg(xpc_part, "unable to get reserved page "
  905. "from nasid %d, reason=%d\n", nasid,
  906. ret);
  907. if (ret == xpcLocalPartid) {
  908. break;
  909. }
  910. continue;
  911. }
  912. remote_vars_pa = remote_rp->vars_pa;
  913. partid = remote_rp->partid;
  914. part = &xpc_partitions[partid];
  915. /* pull over the cross partition variables */
  916. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  917. if (ret != xpcSuccess) {
  918. dev_dbg(xpc_part, "unable to get XPC variables "
  919. "from nasid %d, reason=%d\n", nasid,
  920. ret);
  921. XPC_DEACTIVATE_PARTITION(part, ret);
  922. continue;
  923. }
  924. if (part->act_state != XPC_P_INACTIVE) {
  925. dev_dbg(xpc_part, "partition %d on nasid %d is "
  926. "already activating\n", partid, nasid);
  927. break;
  928. }
  929. /*
  930. * Register the remote partition's AMOs with SAL so it
  931. * can handle and cleanup errors within that address
  932. * range should the remote partition go down. We don't
  933. * unregister this range because it is difficult to
  934. * tell when outstanding writes to the remote partition
  935. * are finished and thus when it is thus safe to
  936. * unregister. This should not result in wasted space
  937. * in the SAL xp_addr_region table because we should
  938. * get the same page for remote_act_amos_pa after
  939. * module reloads and system reboots.
  940. */
  941. if (sn_register_xp_addr_region(
  942. remote_vars->amos_page_pa,
  943. PAGE_SIZE, 1) < 0) {
  944. dev_dbg(xpc_part, "partition %d failed to "
  945. "register xp_addr region 0x%016lx\n",
  946. partid, remote_vars->amos_page_pa);
  947. XPC_SET_REASON(part, xpcPhysAddrRegFailed,
  948. __LINE__);
  949. break;
  950. }
  951. /*
  952. * The remote nasid is valid and available.
  953. * Send an interrupt to that nasid to notify
  954. * it that we are ready to begin activation.
  955. */
  956. dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
  957. "nasid %d, phys_cpuid 0x%x\n",
  958. remote_vars->amos_page_pa,
  959. remote_vars->act_nasid,
  960. remote_vars->act_phys_cpuid);
  961. if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  962. version)) {
  963. part->remote_amos_page_pa =
  964. remote_vars->amos_page_pa;
  965. xpc_mark_partition_disengaged(part);
  966. xpc_cancel_partition_disengage_request(part);
  967. }
  968. xpc_IPI_send_activate(remote_vars);
  969. }
  970. }
  971. kfree(discovered_nasids);
  972. kfree(remote_rp_base);
  973. }
  974. /*
  975. * Given a partid, get the nasids owned by that partition from the
  976. * remote partition's reserved page.
  977. */
  978. enum xpc_retval
  979. xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
  980. {
  981. struct xpc_partition *part;
  982. u64 part_nasid_pa;
  983. int bte_res;
  984. part = &xpc_partitions[partid];
  985. if (part->remote_rp_pa == 0) {
  986. return xpcPartitionDown;
  987. }
  988. memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
  989. part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
  990. bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
  991. xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  992. return xpc_map_bte_errors(bte_res);
  993. }