xpc_partition.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
  7. */
  8. /*
  9. * Cross Partition Communication (XPC) partition support.
  10. *
  11. * This is the part of XPC that detects the presence/absence of
  12. * other partitions. It provides a heartbeat and monitors the
  13. * heartbeats of other partitions.
  14. *
  15. */
  16. #include <linux/kernel.h>
  17. #include <linux/sysctl.h>
  18. #include <linux/cache.h>
  19. #include <linux/mmzone.h>
  20. #include <linux/nodemask.h>
  21. #include <asm/uncached.h>
  22. #include <asm/sn/bte.h>
  23. #include <asm/sn/intr.h>
  24. #include <asm/sn/sn_sal.h>
  25. #include <asm/sn/nodepda.h>
  26. #include <asm/sn/addrs.h>
  27. #include <asm/sn/xpc.h>
  28. /* XPC is exiting flag */
  29. int xpc_exiting;
  30. /* SH_IPI_ACCESS shub register value on startup */
  31. static u64 xpc_sh1_IPI_access;
  32. static u64 xpc_sh2_IPI_access0;
  33. static u64 xpc_sh2_IPI_access1;
  34. static u64 xpc_sh2_IPI_access2;
  35. static u64 xpc_sh2_IPI_access3;
  36. /* original protection values for each node */
  37. u64 xpc_prot_vec[MAX_NUMNODES];
  38. /* this partition's reserved page pointers */
  39. struct xpc_rsvd_page *xpc_rsvd_page;
  40. static u64 *xpc_part_nasids;
  41. static u64 *xpc_mach_nasids;
  42. struct xpc_vars *xpc_vars;
  43. struct xpc_vars_part *xpc_vars_part;
  44. static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
  45. static int xp_nasid_mask_words; /* actual size in words of nasid mask */
  46. /*
  47. * For performance reasons, each entry of xpc_partitions[] is cacheline
  48. * aligned. And xpc_partitions[] is padded with an additional entry at the
  49. * end so that the last legitimate entry doesn't share its cacheline with
  50. * another variable.
  51. */
  52. struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
  53. /*
  54. * Generic buffer used to store a local copy of portions of a remote
  55. * partition's reserved page (either its header and part_nasids mask,
  56. * or its vars).
  57. *
  58. * xpc_discovery runs only once and is a seperate thread that is
  59. * very likely going to be processing in parallel with receiving
  60. * interrupts.
  61. */
  62. char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
  63. XP_NASID_MASK_BYTES];
  64. /*
  65. * Guarantee that the kmalloc'd memory is cacheline aligned.
  66. */
  67. static void *
  68. xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
  69. {
  70. /* see if kmalloc will give us cachline aligned memory by default */
  71. *base = kmalloc(size, flags);
  72. if (*base == NULL) {
  73. return NULL;
  74. }
  75. if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
  76. return *base;
  77. }
  78. kfree(*base);
  79. /* nope, we'll have to do it ourselves */
  80. *base = kmalloc(size + L1_CACHE_BYTES, flags);
  81. if (*base == NULL) {
  82. return NULL;
  83. }
  84. return (void *) L1_CACHE_ALIGN((u64) *base);
  85. }
  86. /*
  87. * Given a nasid, get the physical address of the partition's reserved page
  88. * for that nasid. This function returns 0 on any error.
  89. */
  90. static u64
  91. xpc_get_rsvd_page_pa(int nasid)
  92. {
  93. bte_result_t bte_res;
  94. s64 status;
  95. u64 cookie = 0;
  96. u64 rp_pa = nasid; /* seed with nasid */
  97. u64 len = 0;
  98. u64 buf = buf;
  99. u64 buf_len = 0;
  100. void *buf_base = NULL;
  101. while (1) {
  102. status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
  103. &len);
  104. dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
  105. "0x%016lx, address=0x%016lx, len=0x%016lx\n",
  106. status, cookie, rp_pa, len);
  107. if (status != SALRET_MORE_PASSES) {
  108. break;
  109. }
  110. if (L1_CACHE_ALIGN(len) > buf_len) {
  111. if (buf_base != NULL) {
  112. kfree(buf_base);
  113. }
  114. buf_len = L1_CACHE_ALIGN(len);
  115. buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
  116. GFP_KERNEL, &buf_base);
  117. if (buf_base == NULL) {
  118. dev_err(xpc_part, "unable to kmalloc "
  119. "len=0x%016lx\n", buf_len);
  120. status = SALRET_ERROR;
  121. break;
  122. }
  123. }
  124. bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
  125. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  126. if (bte_res != BTE_SUCCESS) {
  127. dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
  128. status = SALRET_ERROR;
  129. break;
  130. }
  131. }
  132. if (buf_base != NULL) {
  133. kfree(buf_base);
  134. }
  135. if (status != SALRET_OK) {
  136. rp_pa = 0;
  137. }
  138. dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
  139. return rp_pa;
  140. }
  141. /*
  142. * Fill the partition reserved page with the information needed by
  143. * other partitions to discover we are alive and establish initial
  144. * communications.
  145. */
  146. struct xpc_rsvd_page *
  147. xpc_rsvd_page_init(void)
  148. {
  149. struct xpc_rsvd_page *rp;
  150. AMO_t *amos_page;
  151. u64 rp_pa, nasid_array = 0;
  152. int i, ret;
  153. /* get the local reserved page's address */
  154. preempt_disable();
  155. rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
  156. preempt_enable();
  157. if (rp_pa == 0) {
  158. dev_err(xpc_part, "SAL failed to locate the reserved page\n");
  159. return NULL;
  160. }
  161. rp = (struct xpc_rsvd_page *) __va(rp_pa);
  162. if (rp->partid != sn_partition_id) {
  163. dev_err(xpc_part, "the reserved page's partid of %d should be "
  164. "%d\n", rp->partid, sn_partition_id);
  165. return NULL;
  166. }
  167. rp->version = XPC_RP_VERSION;
  168. /* establish the actual sizes of the nasid masks */
  169. if (rp->SAL_version == 1) {
  170. /* SAL_version 1 didn't set the nasids_size field */
  171. rp->nasids_size = 128;
  172. }
  173. xp_nasid_mask_bytes = rp->nasids_size;
  174. xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
  175. /* setup the pointers to the various items in the reserved page */
  176. xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
  177. xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
  178. xpc_vars = XPC_RP_VARS(rp);
  179. xpc_vars_part = XPC_RP_VARS_PART(rp);
  180. /*
  181. * Before clearing xpc_vars, see if a page of AMOs had been previously
  182. * allocated. If not we'll need to allocate one and set permissions
  183. * so that cross-partition AMOs are allowed.
  184. *
  185. * The allocated AMO page needs MCA reporting to remain disabled after
  186. * XPC has unloaded. To make this work, we keep a copy of the pointer
  187. * to this page (i.e., amos_page) in the struct xpc_vars structure,
  188. * which is pointed to by the reserved page, and re-use that saved copy
  189. * on subsequent loads of XPC. This AMO page is never freed, and its
  190. * memory protections are never restricted.
  191. */
  192. if ((amos_page = xpc_vars->amos_page) == NULL) {
  193. amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
  194. if (amos_page == NULL) {
  195. dev_err(xpc_part, "can't allocate page of AMOs\n");
  196. return NULL;
  197. }
  198. /*
  199. * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
  200. * when xpc_allow_IPI_ops() is called via xpc_hb_init().
  201. */
  202. if (!enable_shub_wars_1_1()) {
  203. ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
  204. PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
  205. &nasid_array);
  206. if (ret != 0) {
  207. dev_err(xpc_part, "can't change memory "
  208. "protections\n");
  209. uncached_free_page(__IA64_UNCACHED_OFFSET |
  210. TO_PHYS((u64) amos_page));
  211. return NULL;
  212. }
  213. }
  214. } else if (!IS_AMO_ADDRESS((u64) amos_page)) {
  215. /*
  216. * EFI's XPBOOT can also set amos_page in the reserved page,
  217. * but it happens to leave it as an uncached physical address
  218. * and we need it to be an uncached virtual, so we'll have to
  219. * convert it.
  220. */
  221. if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
  222. dev_err(xpc_part, "previously used amos_page address "
  223. "is bad = 0x%p\n", (void *) amos_page);
  224. return NULL;
  225. }
  226. amos_page = (AMO_t *) TO_AMO((u64) amos_page);
  227. }
  228. /* clear xpc_vars */
  229. memset(xpc_vars, 0, sizeof(struct xpc_vars));
  230. xpc_vars->version = XPC_V_VERSION;
  231. xpc_vars->act_nasid = cpuid_to_nasid(0);
  232. xpc_vars->act_phys_cpuid = cpu_physical_id(0);
  233. xpc_vars->vars_part_pa = __pa(xpc_vars_part);
  234. xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
  235. xpc_vars->amos_page = amos_page; /* save for next load of XPC */
  236. /* clear xpc_vars_part */
  237. memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
  238. XP_MAX_PARTITIONS);
  239. /* initialize the activate IRQ related AMO variables */
  240. for (i = 0; i < xp_nasid_mask_words; i++) {
  241. (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
  242. }
  243. /* initialize the engaged remote partitions related AMO variables */
  244. (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
  245. (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
  246. /* timestamp of when reserved page was setup by XPC */
  247. rp->stamp = CURRENT_TIME;
  248. /*
  249. * This signifies to the remote partition that our reserved
  250. * page is initialized.
  251. */
  252. rp->vars_pa = __pa(xpc_vars);
  253. return rp;
  254. }
  255. /*
  256. * Change protections to allow IPI operations (and AMO operations on
  257. * Shub 1.1 systems).
  258. */
  259. void
  260. xpc_allow_IPI_ops(void)
  261. {
  262. int node;
  263. int nasid;
  264. // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
  265. if (is_shub2()) {
  266. xpc_sh2_IPI_access0 =
  267. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
  268. xpc_sh2_IPI_access1 =
  269. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
  270. xpc_sh2_IPI_access2 =
  271. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
  272. xpc_sh2_IPI_access3 =
  273. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
  274. for_each_online_node(node) {
  275. nasid = cnodeid_to_nasid(node);
  276. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  277. -1UL);
  278. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  279. -1UL);
  280. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  281. -1UL);
  282. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  283. -1UL);
  284. }
  285. } else {
  286. xpc_sh1_IPI_access =
  287. (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
  288. for_each_online_node(node) {
  289. nasid = cnodeid_to_nasid(node);
  290. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  291. -1UL);
  292. /*
  293. * Since the BIST collides with memory operations on
  294. * SHUB 1.1 sn_change_memprotect() cannot be used.
  295. */
  296. if (enable_shub_wars_1_1()) {
  297. /* open up everything */
  298. xpc_prot_vec[node] = (u64) HUB_L((u64 *)
  299. GLOBAL_MMR_ADDR(nasid,
  300. SH1_MD_DQLP_MMR_DIR_PRIVEC0));
  301. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  302. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  303. -1UL);
  304. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  305. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  306. -1UL);
  307. }
  308. }
  309. }
  310. }
  311. /*
  312. * Restrict protections to disallow IPI operations (and AMO operations on
  313. * Shub 1.1 systems).
  314. */
  315. void
  316. xpc_restrict_IPI_ops(void)
  317. {
  318. int node;
  319. int nasid;
  320. // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
  321. if (is_shub2()) {
  322. for_each_online_node(node) {
  323. nasid = cnodeid_to_nasid(node);
  324. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
  325. xpc_sh2_IPI_access0);
  326. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
  327. xpc_sh2_IPI_access1);
  328. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
  329. xpc_sh2_IPI_access2);
  330. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
  331. xpc_sh2_IPI_access3);
  332. }
  333. } else {
  334. for_each_online_node(node) {
  335. nasid = cnodeid_to_nasid(node);
  336. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
  337. xpc_sh1_IPI_access);
  338. if (enable_shub_wars_1_1()) {
  339. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  340. SH1_MD_DQLP_MMR_DIR_PRIVEC0),
  341. xpc_prot_vec[node]);
  342. HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
  343. SH1_MD_DQRP_MMR_DIR_PRIVEC0),
  344. xpc_prot_vec[node]);
  345. }
  346. }
  347. }
  348. }
  349. /*
  350. * At periodic intervals, scan through all active partitions and ensure
  351. * their heartbeat is still active. If not, the partition is deactivated.
  352. */
  353. void
  354. xpc_check_remote_hb(void)
  355. {
  356. struct xpc_vars *remote_vars;
  357. struct xpc_partition *part;
  358. partid_t partid;
  359. bte_result_t bres;
  360. remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
  361. for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
  362. if (xpc_exiting) {
  363. break;
  364. }
  365. if (partid == sn_partition_id) {
  366. continue;
  367. }
  368. part = &xpc_partitions[partid];
  369. if (part->act_state == XPC_P_INACTIVE ||
  370. part->act_state == XPC_P_DEACTIVATING) {
  371. continue;
  372. }
  373. /* pull the remote_hb cache line */
  374. bres = xp_bte_copy(part->remote_vars_pa,
  375. ia64_tpa((u64) remote_vars),
  376. XPC_RP_VARS_SIZE,
  377. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  378. if (bres != BTE_SUCCESS) {
  379. XPC_DEACTIVATE_PARTITION(part,
  380. xpc_map_bte_errors(bres));
  381. continue;
  382. }
  383. dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
  384. " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
  385. partid, remote_vars->heartbeat, part->last_heartbeat,
  386. remote_vars->heartbeat_offline,
  387. remote_vars->heartbeating_to_mask);
  388. if (((remote_vars->heartbeat == part->last_heartbeat) &&
  389. (remote_vars->heartbeat_offline == 0)) ||
  390. !xpc_hb_allowed(sn_partition_id, remote_vars)) {
  391. XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
  392. continue;
  393. }
  394. part->last_heartbeat = remote_vars->heartbeat;
  395. }
  396. }
  397. /*
  398. * Get a copy of a portion of the remote partition's rsvd page.
  399. *
  400. * remote_rp points to a buffer that is cacheline aligned for BTE copies and
  401. * is large enough to contain a copy of their reserved page header and
  402. * part_nasids mask.
  403. */
  404. static enum xpc_retval
  405. xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
  406. struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
  407. {
  408. int bres, i;
  409. /* get the reserved page's physical address */
  410. *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
  411. if (*remote_rp_pa == 0) {
  412. return xpcNoRsvdPageAddr;
  413. }
  414. /* pull over the reserved page header and part_nasids mask */
  415. bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
  416. XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
  417. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  418. if (bres != BTE_SUCCESS) {
  419. return xpc_map_bte_errors(bres);
  420. }
  421. if (discovered_nasids != NULL) {
  422. u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
  423. for (i = 0; i < xp_nasid_mask_words; i++) {
  424. discovered_nasids[i] |= remote_part_nasids[i];
  425. }
  426. }
  427. /* check that the partid is for another partition */
  428. if (remote_rp->partid < 1 ||
  429. remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
  430. return xpcInvalidPartid;
  431. }
  432. if (remote_rp->partid == sn_partition_id) {
  433. return xpcLocalPartid;
  434. }
  435. if (XPC_VERSION_MAJOR(remote_rp->version) !=
  436. XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
  437. return xpcBadVersion;
  438. }
  439. return xpcSuccess;
  440. }
  441. /*
  442. * Get a copy of the remote partition's XPC variables from the reserved page.
  443. *
  444. * remote_vars points to a buffer that is cacheline aligned for BTE copies and
  445. * assumed to be of size XPC_RP_VARS_SIZE.
  446. */
  447. static enum xpc_retval
  448. xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
  449. {
  450. int bres;
  451. if (remote_vars_pa == 0) {
  452. return xpcVarsNotSet;
  453. }
  454. /* pull over the cross partition variables */
  455. bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
  456. XPC_RP_VARS_SIZE,
  457. (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  458. if (bres != BTE_SUCCESS) {
  459. return xpc_map_bte_errors(bres);
  460. }
  461. if (XPC_VERSION_MAJOR(remote_vars->version) !=
  462. XPC_VERSION_MAJOR(XPC_V_VERSION)) {
  463. return xpcBadVersion;
  464. }
  465. return xpcSuccess;
  466. }
  467. /*
  468. * Update the remote partition's info.
  469. */
  470. static void
  471. xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
  472. struct timespec *remote_rp_stamp, u64 remote_rp_pa,
  473. u64 remote_vars_pa, struct xpc_vars *remote_vars)
  474. {
  475. part->remote_rp_version = remote_rp_version;
  476. dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n",
  477. part->remote_rp_version);
  478. part->remote_rp_stamp = *remote_rp_stamp;
  479. dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
  480. part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
  481. part->remote_rp_pa = remote_rp_pa;
  482. dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
  483. part->remote_vars_pa = remote_vars_pa;
  484. dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
  485. part->remote_vars_pa);
  486. part->last_heartbeat = remote_vars->heartbeat;
  487. dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
  488. part->last_heartbeat);
  489. part->remote_vars_part_pa = remote_vars->vars_part_pa;
  490. dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
  491. part->remote_vars_part_pa);
  492. part->remote_act_nasid = remote_vars->act_nasid;
  493. dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
  494. part->remote_act_nasid);
  495. part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
  496. dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
  497. part->remote_act_phys_cpuid);
  498. part->remote_amos_page_pa = remote_vars->amos_page_pa;
  499. dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
  500. part->remote_amos_page_pa);
  501. part->remote_vars_version = remote_vars->version;
  502. dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
  503. part->remote_vars_version);
  504. }
  505. /*
  506. * Prior code has determined the nasid which generated an IPI. Inspect
  507. * that nasid to determine if its partition needs to be activated or
  508. * deactivated.
  509. *
  510. * A partition is consider "awaiting activation" if our partition
  511. * flags indicate it is not active and it has a heartbeat. A
  512. * partition is considered "awaiting deactivation" if our partition
  513. * flags indicate it is active but it has no heartbeat or it is not
  514. * sending its heartbeat to us.
  515. *
  516. * To determine the heartbeat, the remote nasid must have a properly
  517. * initialized reserved page.
  518. */
  519. static void
  520. xpc_identify_act_IRQ_req(int nasid)
  521. {
  522. struct xpc_rsvd_page *remote_rp;
  523. struct xpc_vars *remote_vars;
  524. u64 remote_rp_pa;
  525. u64 remote_vars_pa;
  526. int remote_rp_version;
  527. int reactivate = 0;
  528. int stamp_diff;
  529. struct timespec remote_rp_stamp = { 0, 0 };
  530. partid_t partid;
  531. struct xpc_partition *part;
  532. enum xpc_retval ret;
  533. /* pull over the reserved page structure */
  534. remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
  535. ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
  536. if (ret != xpcSuccess) {
  537. dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
  538. "which sent interrupt, reason=%d\n", nasid, ret);
  539. return;
  540. }
  541. remote_vars_pa = remote_rp->vars_pa;
  542. remote_rp_version = remote_rp->version;
  543. if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  544. remote_rp_stamp = remote_rp->stamp;
  545. }
  546. partid = remote_rp->partid;
  547. part = &xpc_partitions[partid];
  548. /* pull over the cross partition variables */
  549. remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
  550. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  551. if (ret != xpcSuccess) {
  552. dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
  553. "which sent interrupt, reason=%d\n", nasid, ret);
  554. XPC_DEACTIVATE_PARTITION(part, ret);
  555. return;
  556. }
  557. part->act_IRQ_rcvd++;
  558. dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
  559. "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
  560. remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
  561. if (xpc_partition_disengaged(part) &&
  562. part->act_state == XPC_P_INACTIVE) {
  563. xpc_update_partition_info(part, remote_rp_version,
  564. &remote_rp_stamp, remote_rp_pa,
  565. remote_vars_pa, remote_vars);
  566. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  567. if (xpc_partition_disengage_requested(1UL << partid)) {
  568. /*
  569. * Other side is waiting on us to disengage,
  570. * even though we already have.
  571. */
  572. return;
  573. }
  574. } else {
  575. /* other side doesn't support disengage requests */
  576. xpc_clear_partition_disengage_request(1UL << partid);
  577. }
  578. xpc_activate_partition(part);
  579. return;
  580. }
  581. DBUG_ON(part->remote_rp_version == 0);
  582. DBUG_ON(part->remote_vars_version == 0);
  583. if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
  584. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
  585. remote_vars_version));
  586. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  587. DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  588. version));
  589. /* see if the other side rebooted */
  590. if (part->remote_amos_page_pa ==
  591. remote_vars->amos_page_pa &&
  592. xpc_hb_allowed(sn_partition_id,
  593. remote_vars)) {
  594. /* doesn't look that way, so ignore the IPI */
  595. return;
  596. }
  597. }
  598. /*
  599. * Other side rebooted and previous XPC didn't support the
  600. * disengage request, so we don't need to do anything special.
  601. */
  602. xpc_update_partition_info(part, remote_rp_version,
  603. &remote_rp_stamp, remote_rp_pa,
  604. remote_vars_pa, remote_vars);
  605. part->reactivate_nasid = nasid;
  606. XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
  607. return;
  608. }
  609. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
  610. if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
  611. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  612. /*
  613. * Other side rebooted and previous XPC did support the
  614. * disengage request, but the new one doesn't.
  615. */
  616. xpc_clear_partition_engaged(1UL << partid);
  617. xpc_clear_partition_disengage_request(1UL << partid);
  618. xpc_update_partition_info(part, remote_rp_version,
  619. &remote_rp_stamp, remote_rp_pa,
  620. remote_vars_pa, remote_vars);
  621. reactivate = 1;
  622. } else {
  623. DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
  624. stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
  625. &remote_rp_stamp);
  626. if (stamp_diff != 0) {
  627. DBUG_ON(stamp_diff >= 0);
  628. /*
  629. * Other side rebooted and the previous XPC did support
  630. * the disengage request, as does the new one.
  631. */
  632. DBUG_ON(xpc_partition_engaged(1UL << partid));
  633. DBUG_ON(xpc_partition_disengage_requested(1UL <<
  634. partid));
  635. xpc_update_partition_info(part, remote_rp_version,
  636. &remote_rp_stamp, remote_rp_pa,
  637. remote_vars_pa, remote_vars);
  638. reactivate = 1;
  639. }
  640. }
  641. if (part->disengage_request_timeout > 0 &&
  642. !xpc_partition_disengaged(part)) {
  643. /* still waiting on other side to disengage from us */
  644. return;
  645. }
  646. if (reactivate) {
  647. part->reactivate_nasid = nasid;
  648. XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
  649. } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
  650. xpc_partition_disengage_requested(1UL << partid)) {
  651. XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
  652. }
  653. }
  654. /*
  655. * Loop through the activation AMO variables and process any bits
  656. * which are set. Each bit indicates a nasid sending a partition
  657. * activation or deactivation request.
  658. *
  659. * Return #of IRQs detected.
  660. */
  661. int
  662. xpc_identify_act_IRQ_sender(void)
  663. {
  664. int word, bit;
  665. u64 nasid_mask;
  666. u64 nasid; /* remote nasid */
  667. int n_IRQs_detected = 0;
  668. AMO_t *act_amos;
  669. act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
  670. /* scan through act AMO variable looking for non-zero entries */
  671. for (word = 0; word < xp_nasid_mask_words; word++) {
  672. if (xpc_exiting) {
  673. break;
  674. }
  675. nasid_mask = xpc_IPI_receive(&act_amos[word]);
  676. if (nasid_mask == 0) {
  677. /* no IRQs from nasids in this variable */
  678. continue;
  679. }
  680. dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
  681. nasid_mask);
  682. /*
  683. * If this nasid has been added to the machine since
  684. * our partition was reset, this will retain the
  685. * remote nasid in our reserved pages machine mask.
  686. * This is used in the event of module reload.
  687. */
  688. xpc_mach_nasids[word] |= nasid_mask;
  689. /* locate the nasid(s) which sent interrupts */
  690. for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
  691. if (nasid_mask & (1UL << bit)) {
  692. n_IRQs_detected++;
  693. nasid = XPC_NASID_FROM_W_B(word, bit);
  694. dev_dbg(xpc_part, "interrupt from nasid %ld\n",
  695. nasid);
  696. xpc_identify_act_IRQ_req(nasid);
  697. }
  698. }
  699. }
  700. return n_IRQs_detected;
  701. }
  702. /*
  703. * See if the other side has responded to a partition disengage request
  704. * from us.
  705. */
  706. int
  707. xpc_partition_disengaged(struct xpc_partition *part)
  708. {
  709. partid_t partid = XPC_PARTID(part);
  710. int disengaged;
  711. disengaged = (xpc_partition_engaged(1UL << partid) == 0);
  712. if (part->disengage_request_timeout) {
  713. if (!disengaged) {
  714. if (jiffies < part->disengage_request_timeout) {
  715. /* timelimit hasn't been reached yet */
  716. return 0;
  717. }
  718. /*
  719. * Other side hasn't responded to our disengage
  720. * request in a timely fashion, so assume it's dead.
  721. */
  722. dev_info(xpc_part, "disengage from remote partition %d "
  723. "timed out\n", partid);
  724. xpc_disengage_request_timedout = 1;
  725. xpc_clear_partition_engaged(1UL << partid);
  726. disengaged = 1;
  727. }
  728. part->disengage_request_timeout = 0;
  729. /* cancel the timer function, provided it's not us */
  730. if (!in_interrupt()) {
  731. del_singleshot_timer_sync(&part->
  732. disengage_request_timer);
  733. }
  734. DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
  735. part->act_state != XPC_P_INACTIVE);
  736. if (part->act_state != XPC_P_INACTIVE) {
  737. xpc_wakeup_channel_mgr(part);
  738. }
  739. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  740. xpc_cancel_partition_disengage_request(part);
  741. }
  742. }
  743. return disengaged;
  744. }
  745. /*
  746. * Mark specified partition as active.
  747. */
  748. enum xpc_retval
  749. xpc_mark_partition_active(struct xpc_partition *part)
  750. {
  751. unsigned long irq_flags;
  752. enum xpc_retval ret;
  753. dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
  754. spin_lock_irqsave(&part->act_lock, irq_flags);
  755. if (part->act_state == XPC_P_ACTIVATING) {
  756. part->act_state = XPC_P_ACTIVE;
  757. ret = xpcSuccess;
  758. } else {
  759. DBUG_ON(part->reason == xpcSuccess);
  760. ret = part->reason;
  761. }
  762. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  763. return ret;
  764. }
  765. /*
  766. * Notify XPC that the partition is down.
  767. */
  768. void
  769. xpc_deactivate_partition(const int line, struct xpc_partition *part,
  770. enum xpc_retval reason)
  771. {
  772. unsigned long irq_flags;
  773. spin_lock_irqsave(&part->act_lock, irq_flags);
  774. if (part->act_state == XPC_P_INACTIVE) {
  775. XPC_SET_REASON(part, reason, line);
  776. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  777. if (reason == xpcReactivating) {
  778. /* we interrupt ourselves to reactivate partition */
  779. xpc_IPI_send_reactivate(part);
  780. }
  781. return;
  782. }
  783. if (part->act_state == XPC_P_DEACTIVATING) {
  784. if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
  785. reason == xpcReactivating) {
  786. XPC_SET_REASON(part, reason, line);
  787. }
  788. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  789. return;
  790. }
  791. part->act_state = XPC_P_DEACTIVATING;
  792. XPC_SET_REASON(part, reason, line);
  793. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  794. if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
  795. xpc_request_partition_disengage(part);
  796. xpc_IPI_send_disengage(part);
  797. /* set a timelimit on the disengage request */
  798. part->disengage_request_timeout = jiffies +
  799. (xpc_disengage_request_timelimit * HZ);
  800. part->disengage_request_timer.expires =
  801. part->disengage_request_timeout;
  802. add_timer(&part->disengage_request_timer);
  803. }
  804. dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
  805. XPC_PARTID(part), reason);
  806. xpc_partition_going_down(part, reason);
  807. }
  808. /*
  809. * Mark specified partition as inactive.
  810. */
  811. void
  812. xpc_mark_partition_inactive(struct xpc_partition *part)
  813. {
  814. unsigned long irq_flags;
  815. dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
  816. XPC_PARTID(part));
  817. spin_lock_irqsave(&part->act_lock, irq_flags);
  818. part->act_state = XPC_P_INACTIVE;
  819. spin_unlock_irqrestore(&part->act_lock, irq_flags);
  820. part->remote_rp_pa = 0;
  821. }
  822. /*
  823. * SAL has provided a partition and machine mask. The partition mask
  824. * contains a bit for each even nasid in our partition. The machine
  825. * mask contains a bit for each even nasid in the entire machine.
  826. *
  827. * Using those two bit arrays, we can determine which nasids are
  828. * known in the machine. Each should also have a reserved page
  829. * initialized if they are available for partitioning.
  830. */
  831. void
  832. xpc_discovery(void)
  833. {
  834. void *remote_rp_base;
  835. struct xpc_rsvd_page *remote_rp;
  836. struct xpc_vars *remote_vars;
  837. u64 remote_rp_pa;
  838. u64 remote_vars_pa;
  839. int region;
  840. int region_size;
  841. int max_regions;
  842. int nasid;
  843. struct xpc_rsvd_page *rp;
  844. partid_t partid;
  845. struct xpc_partition *part;
  846. u64 *discovered_nasids;
  847. enum xpc_retval ret;
  848. remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
  849. xp_nasid_mask_bytes,
  850. GFP_KERNEL, &remote_rp_base);
  851. if (remote_rp == NULL) {
  852. return;
  853. }
  854. remote_vars = (struct xpc_vars *) remote_rp;
  855. discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
  856. GFP_KERNEL);
  857. if (discovered_nasids == NULL) {
  858. kfree(remote_rp_base);
  859. return;
  860. }
  861. rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
  862. /*
  863. * The term 'region' in this context refers to the minimum number of
  864. * nodes that can comprise an access protection grouping. The access
  865. * protection is in regards to memory, IOI and IPI.
  866. */
  867. max_regions = 64;
  868. region_size = sn_region_size;
  869. switch (region_size) {
  870. case 128:
  871. max_regions *= 2;
  872. case 64:
  873. max_regions *= 2;
  874. case 32:
  875. max_regions *= 2;
  876. region_size = 16;
  877. DBUG_ON(!is_shub2());
  878. }
  879. for (region = 0; region < max_regions; region++) {
  880. if ((volatile int) xpc_exiting) {
  881. break;
  882. }
  883. dev_dbg(xpc_part, "searching region %d\n", region);
  884. for (nasid = (region * region_size * 2);
  885. nasid < ((region + 1) * region_size * 2);
  886. nasid += 2) {
  887. if ((volatile int) xpc_exiting) {
  888. break;
  889. }
  890. dev_dbg(xpc_part, "checking nasid %d\n", nasid);
  891. if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
  892. dev_dbg(xpc_part, "PROM indicates Nasid %d is "
  893. "part of the local partition; skipping "
  894. "region\n", nasid);
  895. break;
  896. }
  897. if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
  898. dev_dbg(xpc_part, "PROM indicates Nasid %d was "
  899. "not on Numa-Link network at reset\n",
  900. nasid);
  901. continue;
  902. }
  903. if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
  904. dev_dbg(xpc_part, "Nasid %d is part of a "
  905. "partition which was previously "
  906. "discovered\n", nasid);
  907. continue;
  908. }
  909. /* pull over the reserved page structure */
  910. ret = xpc_get_remote_rp(nasid, discovered_nasids,
  911. remote_rp, &remote_rp_pa);
  912. if (ret != xpcSuccess) {
  913. dev_dbg(xpc_part, "unable to get reserved page "
  914. "from nasid %d, reason=%d\n", nasid,
  915. ret);
  916. if (ret == xpcLocalPartid) {
  917. break;
  918. }
  919. continue;
  920. }
  921. remote_vars_pa = remote_rp->vars_pa;
  922. partid = remote_rp->partid;
  923. part = &xpc_partitions[partid];
  924. /* pull over the cross partition variables */
  925. ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
  926. if (ret != xpcSuccess) {
  927. dev_dbg(xpc_part, "unable to get XPC variables "
  928. "from nasid %d, reason=%d\n", nasid,
  929. ret);
  930. XPC_DEACTIVATE_PARTITION(part, ret);
  931. continue;
  932. }
  933. if (part->act_state != XPC_P_INACTIVE) {
  934. dev_dbg(xpc_part, "partition %d on nasid %d is "
  935. "already activating\n", partid, nasid);
  936. break;
  937. }
  938. /*
  939. * Register the remote partition's AMOs with SAL so it
  940. * can handle and cleanup errors within that address
  941. * range should the remote partition go down. We don't
  942. * unregister this range because it is difficult to
  943. * tell when outstanding writes to the remote partition
  944. * are finished and thus when it is thus safe to
  945. * unregister. This should not result in wasted space
  946. * in the SAL xp_addr_region table because we should
  947. * get the same page for remote_act_amos_pa after
  948. * module reloads and system reboots.
  949. */
  950. if (sn_register_xp_addr_region(
  951. remote_vars->amos_page_pa,
  952. PAGE_SIZE, 1) < 0) {
  953. dev_dbg(xpc_part, "partition %d failed to "
  954. "register xp_addr region 0x%016lx\n",
  955. partid, remote_vars->amos_page_pa);
  956. XPC_SET_REASON(part, xpcPhysAddrRegFailed,
  957. __LINE__);
  958. break;
  959. }
  960. /*
  961. * The remote nasid is valid and available.
  962. * Send an interrupt to that nasid to notify
  963. * it that we are ready to begin activation.
  964. */
  965. dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
  966. "nasid %d, phys_cpuid 0x%x\n",
  967. remote_vars->amos_page_pa,
  968. remote_vars->act_nasid,
  969. remote_vars->act_phys_cpuid);
  970. if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
  971. version)) {
  972. part->remote_amos_page_pa =
  973. remote_vars->amos_page_pa;
  974. xpc_mark_partition_disengaged(part);
  975. xpc_cancel_partition_disengage_request(part);
  976. }
  977. xpc_IPI_send_activate(remote_vars);
  978. }
  979. }
  980. kfree(discovered_nasids);
  981. kfree(remote_rp_base);
  982. }
  983. /*
  984. * Given a partid, get the nasids owned by that partition from the
  985. * remote partition's reserved page.
  986. */
  987. enum xpc_retval
  988. xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
  989. {
  990. struct xpc_partition *part;
  991. u64 part_nasid_pa;
  992. int bte_res;
  993. part = &xpc_partitions[partid];
  994. if (part->remote_rp_pa == 0) {
  995. return xpcPartitionDown;
  996. }
  997. memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
  998. part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
  999. bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
  1000. xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
  1001. return xpc_map_bte_errors(bte_res);
  1002. }