xpc_partition.c 31 KB


  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. /*
  9. * Cross Partition Communication (XPC) partition support.
  10. *
  11. * This is the part of XPC that detects the presence/absence of
  12. * other partitions. It provides a heartbeat and monitors the
  13. * heartbeats of other partitions.
  14. *
  15. */
  16. #include <linux/kernel.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/cache.h>
  19. #include <linux/mmzone.h>
  20. #include <linux/nodemask.h>
  21. #include <asm/uncached.h>
  22. #include <asm/sn/bte.h>
  23. #include <asm/sn/intr.h>
  24. #include <asm/sn/sn_sal.h>
  25. #include <asm/sn/nodepda.h>
  26. #include <asm/sn/addrs.h>
  27. #include "xpc.h"
  28. /* XPC is exiting flag */
  29. int xpc_exiting;
  30. /* SH_IPI_ACCESS shub register value on startup */
  31. static u64 xpc_sh1_IPI_access;
  32. static u64 xpc_sh2_IPI_access0;
  33. static u64 xpc_sh2_IPI_access1;
  34. static u64 xpc_sh2_IPI_access2;
  35. static u64 xpc_sh2_IPI_access3;
  36. /* original protection values for each node */
  37. u64 xpc_prot_vec[MAX_NUMNODES];
  38. /* this partition's reserved page pointers */
  39. struct xpc_rsvd_page *xpc_rsvd_page;
  40. static u64 *xpc_part_nasids;
  41. static u64 *xpc_mach_nasids;
  42. struct xpc_vars *xpc_vars;
  43. struct xpc_vars_part *xpc_vars_part;
  44. static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
  45. static int xp_nasid_mask_words; /* actual size in words of nasid mask */
  46. /*
  47. * For performance reasons, each entry of xpc_partitions[] is cacheline
  48. * aligned. And xpc_partitions[] is padded with an additional entry at the
  49. * end so that the last legitimate entry doesn't share its cacheline with
  50. * another variable.
  51. */
  52. struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
  53. /*
  54. * Generic buffer used to store a local copy of portions of a remote
  55. * partition's reserved page (either its header and part_nasids mask,
  56. * or its vars).
  57. */
  58. char *xpc_remote_copy_buffer;
  59. void *xpc_remote_copy_buffer_base;
  60. /*
  61. * Guarantee that the kmalloc'd memory is cacheline aligned.
  62. */
  63. void *
  64. xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  65. {
  66. /* see if kmalloc will give us cachline aligned memory by default */
  67. *base = kmalloc(size, flags);
  68. if (*base == NULL)
  69. return NULL;
  70. if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
  71. return *base;
  72. kfree(*base);
  73. /* nope, we'll have to do it ourselves */
  74. *base = kmalloc(size + L1_CACHE_BYTES, flags);
  75. if (*base == NULL)
  76. return NULL;
  77. return (void *)L1_CACHE_ALIGN((u64)*base);
  78. }
  79. /*
  80. * Given a nasid, get the physical address of the partition's reserved page
  81. * for that nasid. This function returns 0 on any error.
  82. */
  83. static u64
  84. xpc_get_rsvd_page_pa(int nasid)
  85. {
  86. bte_result_t bte_res;
  87. s64 status;
  88. u64 cookie = 0;
  89. u64 rp_pa = nasid; /* seed with nasid */
  90. u64 len = 0;
  91. u64 buf = buf;
  92. u64 buf_len = 0;
  93. void *buf_base = NULL;
  94. while (1) {
  95. status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
  96. &len);
  97. dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
  98. "0x%016lx, address=0x%016lx, len=0x%016lx\n",
  99. status, cookie, rp_pa, len);
  100. if (status != SALRET_MORE_PASSES)
  101. break;
  102. if (L1_CACHE_ALIGN(len) > buf_len) {
  103. kfree(buf_base);
  104. buf_len = L1_CACHE_ALIGN(len);
  105. buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
  106. GFP_KERNEL,
  107. &buf_base);
  108. if (buf_base == NULL) {
  109. dev_err(xpc_part, "unable to kmalloc "
  110. "len=0x%016lx\n", buf_len);
  111. status = SALRET_ERROR;
  112. break;
  113. }
  114. }
  115. bte_res = xp_bte_copy(rp_pa, buf, buf_len,
  116. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  117. if (bte_res != BTE_SUCCESS) {
  118. dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
  119. status = SALRET_ERROR;
  120. break;
  121. }
  122. }
  123. kfree(buf_base);
  124. if (status != SALRET_OK)
  125. rp_pa = 0;
  126. dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
  127. return rp_pa;
  128. }
  129. /*
  130. * Fill the partition reserved page with the information needed by
  131. * other partitions to discover we are alive and establish initial
  132. * communications.
  133. */
  134. struct xpc_rsvd_page *
  135. xpc_rsvd_page_init(void)
  136. {
  137. struct xpc_rsvd_page *rp;
  138. AMO_t *amos_page;
  139. u64 rp_pa, nasid_array = 0;
  140. int i, ret;
  141. /* get the local reserved page's address */
  142. preempt_disable();
  143. rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
  144. preempt_enable();
  145. if (rp_pa == 0) {
  146. dev_err(xpc_part, "SAL failed to locate the reserved page\n");
  147. return NULL;
  148. }
  149. rp = (struct xpc_rsvd_page *)__va(rp_pa);
  150. if (rp->partid != sn_partition_id) {
  151. dev_err(xpc_part, "the reserved page's partid of %d should be "
  152. "%d\n", rp->partid, sn_partition_id);
  153. return NULL;
  154. }
  155. rp->version = XPC_RP_VERSION;
  156. /* establish the actual sizes of the nasid masks */
  157. if (rp->SAL_version == 1) {
  158. /* SAL_version 1 didn't set the nasids_size field */
  159. rp->nasids_size = 128;
  160. }
  161. xp_nasid_mask_bytes = rp->nasids_size;
  162. xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
  163. /* setup the pointers to the various items in the reserved page */
  164. xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
  165. xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
  166. xpc_vars = XPC_RP_VARS(rp);
  167. xpc_vars_part = XPC_RP_VARS_PART(rp);
  168. /*
  169. * Before clearing xpc_vars, see if a page of AMOs had been previously
  170. * allocated. If not we'll need to allocate one and set permissions
  171. * so that cross-partition AMOs are allowed.
  172. *
  173. * The allocated AMO page needs MCA reporting to remain disabled after
  174. * XPC has unloaded. To make this work, we keep a copy of the pointer
  175. * to this page (i.e., amos_page) in the struct xpc_vars structure,
  176. * which is pointed to by the reserved page, and re-use that saved copy
  177. * on subsequent loads of XPC. This AMO page is never freed, and its
  178. * memory protections are never restricted.
  179. */
  180. amos_page = xpc_vars->amos_page;
  181. if (amos_page == NULL) {
  182. amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
  183. if (amos_page == NULL) {
  184. dev_err(xpc_part, "can't allocate page of AMOs\n");
  185. return NULL;
  186. }
  187. /*
  188. * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
  189. * when xpc_allow_IPI_ops() is called via xpc_hb_init().
  190. */
  191. if (!enable_shub_wars_1_1()) {
  192. ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
  193. PAGE_SIZE,
  194. SN_MEMPROT_ACCESS_CLASS_1,
  195. &nasid_array);
  196. if (ret != 0) {
  197. dev_err(xpc_part, "can't change memory "
  198. "protections\n");
  199. uncached_free_page(__IA64_UNCACHED_OFFSET |
  200. TO_PHYS((u64)amos_page), 1);
  201. return NULL;
  202. }
  203. }
  204. } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
  205. /*
  206. * EFI's XPBOOT can also set amos_page in the reserved page,
  207. * but it happens to leave it as an uncached physical address
  208. * and we need it to be an uncached virtual, so we'll have to
  209. * convert it.
  210. */
  211. if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
  212. dev_err(xpc_part, "previously used amos_page address "
  213. "is bad = 0x%p\n", (void *)amos_page);
  214. return NULL;
  215. }
  216. amos_page = (AMO_t *)TO_AMO((u64)amos_page);
  217. }
  218. /* clear xpc_vars */
  219. memset(xpc_vars, 0, sizeof(struct xpc_vars));
  220. xpc_vars->version = XPC_V_VERSION;
  221. xpc_vars->act_nasid = cpuid_to_nasid(0);
  222. xpc_vars->act_phys_cpuid = cpu_physical_id(0);
  223. xpc_vars->vars_part_pa = __pa(xpc_vars_part);
  224. xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
  225. xpc_vars->amos_page = amos_page; /* save for next load of XPC */
  226. /* clear xpc_vars_part */
  227. memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
  228. XP_MAX_PARTITIONS);
  229. /* initialize the activate IRQ related AMO variables */
  230. for (i = 0; i < xp_nasid_mask_words; i++)
  231. (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
  232. /* initialize the engaged remote partitions related AMO variables */
  233. (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
  234. (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
  235. /* timestamp of when reserved page was setup by XPC */
  236. rp->stamp = CURRENT_TIME;
  237. /*
  238. * This signifies to the remote partition that our reserved
  239. * page is initialized.
  240. */
  241. rp->vars_pa = __pa(xpc_vars);
  242. return rp;
  243. }
  244. /*
  245. * Change protections to allow IPI operations (and AMO operations on
  246. * Shub 1.1 systems).
  247. */
  248. void
  249. xpc_allow_IPI_ops(void)
  250. {
  251. int node;
  252. int nasid;
  253. /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
  254. if (is_shub2()) {
  255. xpc_sh2_IPI_access0 =
  256. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
  257. xpc_sh2_IPI_access1 =
  258. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
  259. xpc_sh2_IPI_access2 =
  260. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
  261. xpc_sh2_IPI_access3 =
  262. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
  263. for_each_online_node(node) {
  264. nasid = cnodeid_to_nasid(node);
  265. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  266. -1UL);
  267. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  268. -1UL);
  269. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  270. -1UL);
  271. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  272. -1UL);
  273. }
  274. } else {
  275. xpc_sh1_IPI_access =
  276. (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
  277. for_each_online_node(node) {
  278. nasid = cnodeid_to_nasid(node);
  279. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  280. -1UL);
  281. /*
  282. * Since the BIST collides with memory operations on
  283. * SHUB 1.1 sn_change_memprotect() cannot be used.
  284. */
  285. if (enable_shub_wars_1_1()) {
  286. /* open up everything */
  287. xpc_prot_vec[node] = (u64)HUB_L((u64 *)
  288. GLOBAL_MMR_ADDR
  289. (nasid,
  290. SH1_MD_DQLP_MMR_DIR_PRIVEC0));
  291. HUB_S((u64 *)
  292. GLOBAL_MMR_ADDR(nasid,
  293. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  294. -1UL);
  295. HUB_S((u64 *)
  296. GLOBAL_MMR_ADDR(nasid,
  297. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  298. -1UL);
  299. }
  300. }
  301. }
  302. }
  303. /*
  304. * Restrict protections to disallow IPI operations (and AMO operations on
  305. * Shub 1.1 systems).
  306. */
  307. void
  308. xpc_restrict_IPI_ops(void)
  309. {
  310. int node;
  311. int nasid;
  312. /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
  313. if (is_shub2()) {
  314. for_each_online_node(node) {
  315. nasid = cnodeid_to_nasid(node);
  316. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  317. xpc_sh2_IPI_access0);
  318. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  319. xpc_sh2_IPI_access1);
  320. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  321. xpc_sh2_IPI_access2);
  322. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  323. xpc_sh2_IPI_access3);
  324. }
  325. } else {
  326. for_each_online_node(node) {
  327. nasid = cnodeid_to_nasid(node);
  328. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  329. xpc_sh1_IPI_access);
  330. if (enable_shub_wars_1_1()) {
  331. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
  332. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  333. xpc_prot_vec[node]);
  334. HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
  335. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  336. xpc_prot_vec[node]);
  337. }
  338. }
  339. }
  340. }
  341. /*
  342. * At periodic intervals, scan through all active partitions and ensure
  343. * their heartbeat is still active. If not, the partition is deactivated.
  344. */
  345. void
  346. xpc_check_remote_hb(void)
  347. {
  348. struct xpc_vars *remote_vars;
  349. struct xpc_partition *part;
  350. partid_t partid;
  351. bte_result_t bres;
  352. remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
  353. for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
  354. if (xpc_exiting)
  355. break;
  356. if (partid == sn_partition_id)
  357. continue;
  358. part = &xpc_partitions[partid];
  359. if (part->act_state == XPC_P_INACTIVE ||
  360. part->act_state == XPC_P_DEACTIVATING) {
  361. continue;
  362. }
  363. /* pull the remote_hb cache line */
  364. bres = xp_bte_copy(part->remote_vars_pa,
  365. (u64)remote_vars,
  366. XPC_RP_VARS_SIZE,
  367. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  368. if (bres != BTE_SUCCESS) {
  369. XPC_DEACTIVATE_PARTITION(part,
  370. xpc_map_bte_errors(bres));
  371. continue;
  372. }
  373. dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
  374. " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
  375. partid, remote_vars->heartbeat, part->last_heartbeat,
  376. remote_vars->heartbeat_offline,
  377. remote_vars->heartbeating_to_mask);
  378. if (((remote_vars->heartbeat == part->last_heartbeat) &&
  379. (remote_vars->heartbeat_offline == 0)) ||
  380. !xpc_hb_allowed(sn_partition_id, remote_vars)) {
  381. XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
  382. continue;
  383. }
  384. part->last_heartbeat = remote_vars->heartbeat;
  385. }
  386. }
  387. /*
  388. * Get a copy of a portion of the remote partition's rsvd page.
  389. *
  390. * remote_rp points to a buffer that is cacheline aligned for BTE copies and
  391. * is large enough to contain a copy of their reserved page header and
  392. * part_nasids mask.
  393. */
  394. static enum xp_retval
  395. xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
  396. struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
  397. {
  398. int bres, i;
  399. /* get the reserved page's physical address */
  400. *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
  401. if (*remote_rp_pa == 0)
  402. return xpNoRsvdPageAddr;
  403. /* pull over the reserved page header and part_nasids mask */
  404. bres = xp_bte_copy(*remote_rp_pa, (u64)remote_rp,
  405. XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
  406. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  407. if (bres != BTE_SUCCESS)
  408. return xpc_map_bte_errors(bres);
  409. if (discovered_nasids != NULL) {
  410. u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
  411. for (i = 0; i < xp_nasid_mask_words; i++)
  412. discovered_nasids[i] |= remote_part_nasids[i];
  413. }
  414. /* check that the partid is for another partition */
  415. if (remote_rp->partid < 1 ||
  416. remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
  417. return xpInvalidPartid;
  418. }
  419. if (remote_rp->partid == sn_partition_id)
  420. return xpLocalPartid;
  421. if (XPC_VERSION_MAJOR(remote_rp->version) !=
  422. XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
  423. return xpBadVersion;
  424. }
  425. return xpSuccess;
  426. }
  427. /*
  428. * Get a copy of the remote partition's XPC variables from the reserved page.
  429. *
  430. * remote_vars points to a buffer that is cacheline aligned for BTE copies and
  431. * assumed to be of size XPC_RP_VARS_SIZE.
  432. */
  433. static enum xp_retval
  434. xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
  435. {
  436. int bres;
  437. if (remote_vars_pa == 0)
  438. return xpVarsNotSet;
  439. /* pull over the cross partition variables */
  440. bres = xp_bte_copy(remote_vars_pa, (u64)remote_vars, XPC_RP_VARS_SIZE,
  441. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  442. if (bres != BTE_SUCCESS)
  443. return xpc_map_bte_errors(bres);
  444. if (XPC_VERSION_MAJOR(remote_vars->version) !=
  445. XPC_VERSION_MAJOR(XPC_V_VERSION)) {
  446. return xpBadVersion;
  447. }
  448. return xpSuccess;
  449. }
  450. /*
  451. * Update the remote partition's info.
  452. */
  453. static void
  454. xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
  455. struct timespec *remote_rp_stamp, u64 remote_rp_pa,
  456. u64 remote_vars_pa, struct xpc_vars *remote_vars)
  457. {
  458. part->remote_rp_version = remote_rp_version;
  459. dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n",
  460. part->remote_rp_version);
  461. part->remote_rp_stamp = *remote_rp_stamp;
  462. dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
  463. part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
  464. part->remote_rp_pa = remote_rp_pa;
  465. dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
  466. part->remote_vars_pa = remote_vars_pa;
  467. dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
  468. part->remote_vars_pa);
  469. part->last_heartbeat = remote_vars->heartbeat;
  470. dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
  471. part->last_heartbeat);
  472. part->remote_vars_part_pa = remote_vars->vars_part_pa;
  473. dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
  474. part->remote_vars_part_pa);
  475. part->remote_act_nasid = remote_vars->act_nasid;
  476. dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
  477. part->remote_act_nasid);
  478. part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
  479. dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
  480. part->remote_act_phys_cpuid);
  481. part->remote_amos_page_pa = remote_vars->amos_page_pa;
  482. dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
  483. part->remote_amos_page_pa);
  484. part->remote_vars_version = remote_vars->version;
  485. dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
  486. part->remote_vars_version);
  487. }
  488. /*
  489. * Prior code has determined the nasid which generated an IPI. Inspect
  490. * that nasid to determine if its partition needs to be activated or
  491. * deactivated.
  492. *
  493. * A partition is consider "awaiting activation" if our partition
  494. * flags indicate it is not active and it has a heartbeat. A
  495. * partition is considered "awaiting deactivation" if our partition
  496. * flags indicate it is active but it has no heartbeat or it is not
  497. * sending its heartbeat to us.
  498. *
  499. * To determine the heartbeat, the remote nasid must have a properly
  500. * initialized reserved page.
  501. */
  502. static void
  503. xpc_identify_act_IRQ_req(int nasid)
  504. {
  505. struct xpc_rsvd_page *remote_rp;
  506. struct xpc_vars *remote_vars;
  507. u64 remote_rp_pa;
  508. u64 remote_vars_pa;
  509. int remote_rp_version;
  510. int reactivate = 0;
  511. int stamp_diff;
  512. struct timespec remote_rp_stamp = { 0, 0 };
  513. partid_t partid;
  514. struct xpc_partition *part;
  515. enum xp_retval ret;
  516. /* pull over the reserved page structure */
  517. remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
  518. ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
  519. if (ret != xpSuccess) {
  520. dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
  521. "which sent interrupt, reason=%d\n", nasid, ret);
  522. return;
  523. }
  524. remote_vars_pa = remote_rp->vars_pa;
  525. remote_rp_version = remote_rp->version;
  526. if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
  527. remote_rp_stamp = remote_rp->stamp;
  528. partid = remote_rp->partid;
  529. part = &xpc_partitions[partid];
  530. /* pull over the cross partition variables */
  531. remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
  532. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  533. if (ret != xpSuccess) {
  534. dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
  535. "which sent interrupt, reason=%d\n", nasid, ret);
  536. XPC_DEACTIVATE_PARTITION(part, ret);
  537. return;
  538. }
  539. part->act_IRQ_rcvd++;
  540. dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
  541. "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
  542. remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
  543. if (xpc_partition_disengaged(part) &&
  544. part->act_state == XPC_P_INACTIVE) {
  545. xpc_update_partition_info(part, remote_rp_version,
  546. &remote_rp_stamp, remote_rp_pa,
  547. remote_vars_pa, remote_vars);
  548. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  549. if (xpc_partition_disengage_requested(1UL << partid)) {
  550. /*
  551. * Other side is waiting on us to disengage,
  552. * even though we already have.
  553. */
  554. return;
  555. }
  556. } else {
  557. /* other side doesn't support disengage requests */
  558. xpc_clear_partition_disengage_request(1UL << partid);
  559. }
  560. xpc_activate_partition(part);
  561. return;
  562. }
  563. DBUG_ON(part->remote_rp_version == 0);
  564. DBUG_ON(part->remote_vars_version == 0);
  565. if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
  566. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
  567. remote_vars_version));
  568. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  569. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  570. version));
  571. /* see if the other side rebooted */
  572. if (part->remote_amos_page_pa ==
  573. remote_vars->amos_page_pa &&
  574. xpc_hb_allowed(sn_partition_id, remote_vars)) {
  575. /* doesn't look that way, so ignore the IPI */
  576. return;
  577. }
  578. }
  579. /*
  580. * Other side rebooted and previous XPC didn't support the
  581. * disengage request, so we don't need to do anything special.
  582. */
  583. xpc_update_partition_info(part, remote_rp_version,
  584. &remote_rp_stamp, remote_rp_pa,
  585. remote_vars_pa, remote_vars);
  586. part->reactivate_nasid = nasid;
  587. XPC_DEACTIVATE_PARTITION(part, xpReactivating);
  588. return;
  589. }
  590. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
  591. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  592. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  593. /*
  594. * Other side rebooted and previous XPC did support the
  595. * disengage request, but the new one doesn't.
  596. */
  597. xpc_clear_partition_engaged(1UL << partid);
  598. xpc_clear_partition_disengage_request(1UL << partid);
  599. xpc_update_partition_info(part, remote_rp_version,
  600. &remote_rp_stamp, remote_rp_pa,
  601. remote_vars_pa, remote_vars);
  602. reactivate = 1;
  603. } else {
  604. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  605. stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
  606. &remote_rp_stamp);
  607. if (stamp_diff != 0) {
  608. DBUG_ON(stamp_diff >= 0);
  609. /*
  610. * Other side rebooted and the previous XPC did support
  611. * the disengage request, as does the new one.
  612. */
  613. DBUG_ON(xpc_partition_engaged(1UL << partid));
  614. DBUG_ON(xpc_partition_disengage_requested(1UL <<
  615. partid));
  616. xpc_update_partition_info(part, remote_rp_version,
  617. &remote_rp_stamp,
  618. remote_rp_pa, remote_vars_pa,
  619. remote_vars);
  620. reactivate = 1;
  621. }
  622. }
  623. if (part->disengage_request_timeout > 0 &&
  624. !xpc_partition_disengaged(part)) {
  625. /* still waiting on other side to disengage from us */
  626. return;
  627. }
  628. if (reactivate) {
  629. part->reactivate_nasid = nasid;
  630. XPC_DEACTIVATE_PARTITION(part, xpReactivating);
  631. } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
  632. xpc_partition_disengage_requested(1UL << partid)) {
  633. XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
  634. }
  635. }
  636. /*
  637. * Loop through the activation AMO variables and process any bits
  638. * which are set. Each bit indicates a nasid sending a partition
  639. * activation or deactivation request.
  640. *
  641. * Return #of IRQs detected.
  642. */
  643. int
  644. xpc_identify_act_IRQ_sender(void)
  645. {
  646. int word, bit;
  647. u64 nasid_mask;
  648. u64 nasid; /* remote nasid */
  649. int n_IRQs_detected = 0;
  650. AMO_t *act_amos;
  651. act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
  652. /* scan through act AMO variable looking for non-zero entries */
  653. for (word = 0; word < xp_nasid_mask_words; word++) {
  654. if (xpc_exiting)
  655. break;
  656. nasid_mask = xpc_IPI_receive(&act_amos[word]);
  657. if (nasid_mask == 0) {
  658. /* no IRQs from nasids in this variable */
  659. continue;
  660. }
  661. dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
  662. nasid_mask);
  663. /*
  664. * If this nasid has been added to the machine since
  665. * our partition was reset, this will retain the
  666. * remote nasid in our reserved pages machine mask.
  667. * This is used in the event of module reload.
  668. */
  669. xpc_mach_nasids[word] |= nasid_mask;
  670. /* locate the nasid(s) which sent interrupts */
  671. for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
  672. if (nasid_mask & (1UL << bit)) {
  673. n_IRQs_detected++;
  674. nasid = XPC_NASID_FROM_W_B(word, bit);
  675. dev_dbg(xpc_part, "interrupt from nasid %ld\n",
  676. nasid);
  677. xpc_identify_act_IRQ_req(nasid);
  678. }
  679. }
  680. }
  681. return n_IRQs_detected;
  682. }
  683. /*
  684. * See if the other side has responded to a partition disengage request
  685. * from us.
  686. */
  687. int
  688. xpc_partition_disengaged(struct xpc_partition *part)
  689. {
  690. partid_t partid = XPC_PARTID(part);
  691. int disengaged;
  692. disengaged = (xpc_partition_engaged(1UL << partid) == 0);
  693. if (part->disengage_request_timeout) {
  694. if (!disengaged) {
  695. if (time_before(jiffies,
  696. part->disengage_request_timeout)) {
  697. /* timelimit hasn't been reached yet */
  698. return 0;
  699. }
  700. /*
  701. * Other side hasn't responded to our disengage
  702. * request in a timely fashion, so assume it's dead.
  703. */
  704. dev_info(xpc_part, "disengage from remote partition %d "
  705. "timed out\n", partid);
  706. xpc_disengage_request_timedout = 1;
  707. xpc_clear_partition_engaged(1UL << partid);
  708. disengaged = 1;
  709. }
  710. part->disengage_request_timeout = 0;
  711. /* cancel the timer function, provided it's not us */
  712. if (!in_interrupt()) {
  713. del_singleshot_timer_sync(&part->
  714. disengage_request_timer);
  715. }
  716. DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
  717. part->act_state != XPC_P_INACTIVE);
  718. if (part->act_state != XPC_P_INACTIVE)
  719. xpc_wakeup_channel_mgr(part);
  720. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
  721. xpc_cancel_partition_disengage_request(part);
  722. }
  723. return disengaged;
  724. }
  725. /*
  726. * Mark specified partition as active.
  727. */
  728. enum xp_retval
  729. xpc_mark_partition_active(struct xpc_partition *part)
  730. {
  731. unsigned long irq_flags;
  732. enum xp_retval ret;
  733. dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
  734. spin_lock_irqsave(&part->act_lock, irq_flags);
  735. if (part->act_state == XPC_P_ACTIVATING) {
  736. part->act_state = XPC_P_ACTIVE;
  737. ret = xpSuccess;
  738. } else {
  739. DBUG_ON(part->reason == xpSuccess);
  740. ret = part->reason;
  741. }
  742. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  743. return ret;
  744. }
  745. /*
  746. * Notify XPC that the partition is down.
  747. */
  748. void
  749. xpc_deactivate_partition(const int line, struct xpc_partition *part,
  750. enum xp_retval reason)
  751. {
  752. unsigned long irq_flags;
  753. spin_lock_irqsave(&part->act_lock, irq_flags);
  754. if (part->act_state == XPC_P_INACTIVE) {
  755. XPC_SET_REASON(part, reason, line);
  756. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  757. if (reason == xpReactivating) {
  758. /* we interrupt ourselves to reactivate partition */
  759. xpc_IPI_send_reactivate(part);
  760. }
  761. return;
  762. }
  763. if (part->act_state == XPC_P_DEACTIVATING) {
  764. if ((part->reason == xpUnloading && reason != xpUnloading) ||
  765. reason == xpReactivating) {
  766. XPC_SET_REASON(part, reason, line);
  767. }
  768. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  769. return;
  770. }
  771. part->act_state = XPC_P_DEACTIVATING;
  772. XPC_SET_REASON(part, reason, line);
  773. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  774. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  775. xpc_request_partition_disengage(part);
  776. xpc_IPI_send_disengage(part);
  777. /* set a timelimit on the disengage request */
  778. part->disengage_request_timeout = jiffies +
  779. (xpc_disengage_request_timelimit * HZ);
  780. part->disengage_request_timer.expires =
  781. part->disengage_request_timeout;
  782. add_timer(&part->disengage_request_timer);
  783. }
  784. dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
  785. XPC_PARTID(part), reason);
  786. xpc_partition_going_down(part, reason);
  787. }
  788. /*
  789. * Mark specified partition as inactive.
  790. */
  791. void
  792. xpc_mark_partition_inactive(struct xpc_partition *part)
  793. {
  794. unsigned long irq_flags;
  795. dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
  796. XPC_PARTID(part));
  797. spin_lock_irqsave(&part->act_lock, irq_flags);
  798. part->act_state = XPC_P_INACTIVE;
  799. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  800. part->remote_rp_pa = 0;
  801. }
  802. /*
  803. * SAL has provided a partition and machine mask. The partition mask
  804. * contains a bit for each even nasid in our partition. The machine
  805. * mask contains a bit for each even nasid in the entire machine.
  806. *
  807. * Using those two bit arrays, we can determine which nasids are
  808. * known in the machine. Each should also have a reserved page
  809. * initialized if they are available for partitioning.
  810. */
  811. void
  812. xpc_discovery(void)
  813. {
  814. void *remote_rp_base;
  815. struct xpc_rsvd_page *remote_rp;
  816. struct xpc_vars *remote_vars;
  817. u64 remote_rp_pa;
  818. u64 remote_vars_pa;
  819. int region;
  820. int region_size;
  821. int max_regions;
  822. int nasid;
  823. struct xpc_rsvd_page *rp;
  824. partid_t partid;
  825. struct xpc_partition *part;
  826. u64 *discovered_nasids;
  827. enum xp_retval ret;
  828. remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
  829. xp_nasid_mask_bytes,
  830. GFP_KERNEL, &remote_rp_base);
  831. if (remote_rp == NULL)
  832. return;
  833. remote_vars = (struct xpc_vars *)remote_rp;
  834. discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
  835. GFP_KERNEL);
  836. if (discovered_nasids == NULL) {
  837. kfree(remote_rp_base);
  838. return;
  839. }
  840. rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
  841. /*
  842. * The term 'region' in this context refers to the minimum number of
  843. * nodes that can comprise an access protection grouping. The access
  844. * protection is in regards to memory, IOI and IPI.
  845. */
  846. max_regions = 64;
  847. region_size = sn_region_size;
  848. switch (region_size) {
  849. case 128:
  850. max_regions *= 2;
  851. case 64:
  852. max_regions *= 2;
  853. case 32:
  854. max_regions *= 2;
  855. region_size = 16;
  856. DBUG_ON(!is_shub2());
  857. }
  858. for (region = 0; region < max_regions; region++) {
  859. if (xpc_exiting)
  860. break;
  861. dev_dbg(xpc_part, "searching region %d\n", region);
  862. for (nasid = (region * region_size * 2);
  863. nasid < ((region + 1) * region_size * 2); nasid += 2) {
  864. if (xpc_exiting)
  865. break;
  866. dev_dbg(xpc_part, "checking nasid %d\n", nasid);
  867. if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
  868. dev_dbg(xpc_part, "PROM indicates Nasid %d is "
  869. "part of the local partition; skipping "
  870. "region\n", nasid);
  871. break;
  872. }
  873. if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
  874. dev_dbg(xpc_part, "PROM indicates Nasid %d was "
  875. "not on Numa-Link network at reset\n",
  876. nasid);
  877. continue;
  878. }
  879. if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
  880. dev_dbg(xpc_part, "Nasid %d is part of a "
  881. "partition which was previously "
  882. "discovered\n", nasid);
  883. continue;
  884. }
  885. /* pull over the reserved page structure */
  886. ret = xpc_get_remote_rp(nasid, discovered_nasids,
  887. remote_rp, &remote_rp_pa);
  888. if (ret != xpSuccess) {
  889. dev_dbg(xpc_part, "unable to get reserved page "
  890. "from nasid %d, reason=%d\n", nasid,
  891. ret);
  892. if (ret == xpLocalPartid)
  893. break;
  894. continue;
  895. }
  896. remote_vars_pa = remote_rp->vars_pa;
  897. partid = remote_rp->partid;
  898. part = &xpc_partitions[partid];
  899. /* pull over the cross partition variables */
  900. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  901. if (ret != xpSuccess) {
  902. dev_dbg(xpc_part, "unable to get XPC variables "
  903. "from nasid %d, reason=%d\n", nasid,
  904. ret);
  905. XPC_DEACTIVATE_PARTITION(part, ret);
  906. continue;
  907. }
  908. if (part->act_state != XPC_P_INACTIVE) {
  909. dev_dbg(xpc_part, "partition %d on nasid %d is "
  910. "already activating\n", partid, nasid);
  911. break;
  912. }
  913. /*
  914. * Register the remote partition's AMOs with SAL so it
  915. * can handle and cleanup errors within that address
  916. * range should the remote partition go down. We don't
  917. * unregister this range because it is difficult to
  918. * tell when outstanding writes to the remote partition
  919. * are finished and thus when it is thus safe to
  920. * unregister. This should not result in wasted space
  921. * in the SAL xp_addr_region table because we should
  922. * get the same page for remote_act_amos_pa after
  923. * module reloads and system reboots.
  924. */
  925. if (sn_register_xp_addr_region
  926. (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
  927. dev_dbg(xpc_part,
  928. "partition %d failed to "
  929. "register xp_addr region 0x%016lx\n",
  930. partid, remote_vars->amos_page_pa);
  931. XPC_SET_REASON(part, xpPhysAddrRegFailed,
  932. __LINE__);
  933. break;
  934. }
  935. /*
  936. * The remote nasid is valid and available.
  937. * Send an interrupt to that nasid to notify
  938. * it that we are ready to begin activation.
  939. */
  940. dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
  941. "nasid %d, phys_cpuid 0x%x\n",
  942. remote_vars->amos_page_pa,
  943. remote_vars->act_nasid,
  944. remote_vars->act_phys_cpuid);
  945. if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  946. version)) {
  947. part->remote_amos_page_pa =
  948. remote_vars->amos_page_pa;
  949. xpc_mark_partition_disengaged(part);
  950. xpc_cancel_partition_disengage_request(part);
  951. }
  952. xpc_IPI_send_activate(remote_vars);
  953. }
  954. }
  955. kfree(discovered_nasids);
  956. kfree(remote_rp_base);
  957. }
  958. /*
  959. * Given a partid, get the nasids owned by that partition from the
  960. * remote partition's reserved page.
  961. */
  962. enum xp_retval
  963. xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
  964. {
  965. struct xpc_partition *part;
  966. u64 part_nasid_pa;
  967. int bte_res;
  968. part = &xpc_partitions[partid];
  969. if (part->remote_rp_pa == 0)
  970. return xpPartitionDown;
  971. memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
  972. part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
  973. bte_res = xp_bte_copy(part_nasid_pa, (u64)nasid_mask,
  974. xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE),
  975. NULL);
  976. return xpc_map_bte_errors(bte_res);
  977. }