uv_bau.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * SGI UV Broadcast Assist Unit definitions
  7. *
  8. * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
  9. */
  10. #ifndef _ASM_X86_UV_UV_BAU_H
  11. #define _ASM_X86_UV_UV_BAU_H
  12. #include <linux/bitmap.h>
  13. #define BITSPERBYTE 8
  14. /*
  15. * Broadcast Assist Unit messaging structures
  16. *
  17. * Selective Broadcast activations are induced by software action
  18. * specifying a particular 8-descriptor "set" via a 6-bit index written
  19. * to an MMR.
  20. * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
  21. * each 6-bit index value. These descriptor sets are mapped in sequence
  22. * starting with set 0 located at the address specified in the
  23. * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
  24. * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
  25. *
  26. * We will use one set for sending BAU messages from each of the
  27. * cpu's on the uvhub.
  28. *
  29. * TLB shootdown will use the first of the 8 descriptors of each set.
  30. * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
  31. */
  32. #define MAX_CPUS_PER_UVHUB 64
  33. #define MAX_CPUS_PER_SOCKET 32
  34. #define UV_ADP_SIZE 64 /* hardware-provided max. */
  35. #define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */
  36. #define UV_ITEMS_PER_DESCRIPTOR 8
  37. /* the 'throttle' to prevent the hardware stay-busy bug */
  38. #define MAX_BAU_CONCURRENT 3
  39. #define UV_ACT_STATUS_MASK 0x3
  40. #define UV_ACT_STATUS_SIZE 2
  41. #define UV_DISTRIBUTION_SIZE 256
  42. #define UV_SW_ACK_NPENDING 8
  43. #define UV_NET_ENDPOINT_INTD 0x38
  44. #define UV_DESC_BASE_PNODE_SHIFT 49
  45. #define UV_PAYLOADQ_PNODE_SHIFT 49
  46. #define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
  47. #define UV_BAU_BASENAME "sgi_uv/bau_tunables"
  48. #define UV_BAU_TUNABLES_DIR "sgi_uv"
  49. #define UV_BAU_TUNABLES_FILE "bau_tunables"
  50. #define WHITESPACE " \t\n"
  51. #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
  52. #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
  53. #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
  54. #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x0000000009UL
  55. /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */
  56. #define BAU_MISC_CONTROL_MULT_MASK 3
  57. #define UVH_AGING_PRESCALE_SEL 0x000000b000UL
  58. /* [30:28] URGENCY_7 an index into a table of times */
  59. #define BAU_URGENCY_7_SHIFT 28
  60. #define BAU_URGENCY_7_MASK 7
  61. #define UVH_TRANSACTION_TIMEOUT 0x000000b200UL
  62. /* [45:40] BAU - BAU transaction timeout select - a multiplier */
  63. #define BAU_TRANS_SHIFT 40
  64. #define BAU_TRANS_MASK 0x3f
  65. /*
  66. * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
  67. */
  68. #define DESC_STATUS_IDLE 0
  69. #define DESC_STATUS_ACTIVE 1
  70. #define DESC_STATUS_DESTINATION_TIMEOUT 2
  71. #define DESC_STATUS_SOURCE_TIMEOUT 3
  72. /*
  73. * delay for 'plugged' timeout retries, in microseconds
  74. */
  75. #define PLUGGED_DELAY 10
  76. /*
  77. * threshholds at which to use IPI to free resources
  78. */
  79. /* after this # consecutive 'plugged' timeouts, use IPI to release resources */
  80. #define PLUGSB4RESET 100
  81. /* after this many consecutive timeouts, use IPI to release resources */
  82. #define TIMEOUTSB4RESET 1
  83. /* at this number uses of IPI to release resources, giveup the request */
  84. #define IPI_RESET_LIMIT 1
  85. /* after this # consecutive successes, bump up the throttle if it was lowered */
  86. #define COMPLETE_THRESHOLD 5
  87. #define UV_LB_SUBNODEID 0x10
  88. /*
  89. * number of entries in the destination side payload queue
  90. */
  91. #define DEST_Q_SIZE 20
  92. /*
  93. * number of destination side software ack resources
  94. */
  95. #define DEST_NUM_RESOURCES 8
  96. /*
  97. * completion statuses for sending a TLB flush message
  98. */
  99. #define FLUSH_RETRY_PLUGGED 1
  100. #define FLUSH_RETRY_TIMEOUT 2
  101. #define FLUSH_GIVEUP 3
  102. #define FLUSH_COMPLETE 4
  103. /*
  104. * tuning the action when the numalink network is extremely delayed
  105. */
  106. #define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */
  107. #define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */
  108. #define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */
  109. /*
  110. * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
  111. * If the 'multilevel' flag in the header portion of the descriptor
  112. * has been set to 0, then endpoint multi-unicast mode is selected.
  113. * The distribution specification (32 bytes) is interpreted as a 256-bit
  114. * distribution vector. Adjacent bits correspond to consecutive even numbered
  115. * nodeIDs. The result of adding the index of a given bit to the 15-bit
  116. * 'base_dest_nasid' field of the header corresponds to the
  117. * destination nodeID associated with that specified bit.
  118. */
  119. struct bau_target_uvhubmask {
  120. unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
  121. };
  122. /*
  123. * mask of cpu's on a uvhub
  124. * (during initialization we need to check that unsigned long has
  125. * enough bits for max. cpu's per uvhub)
  126. */
  127. struct bau_local_cpumask {
  128. unsigned long bits;
  129. };
  130. /*
  131. * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
  132. * only 12 bytes (96 bits) of the payload area are usable.
  133. * An additional 3 bytes (bits 27:4) of the header address are carried
  134. * to the next bytes of the destination payload queue.
  135. * And an additional 2 bytes of the header Suppl_A field are also
  136. * carried to the destination payload queue.
  137. * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
  138. * of the destination payload queue, which is written by the hardware
  139. * with the s/w ack resource bit vector.
  140. * [ effective message contents (16 bytes (128 bits) maximum), not counting
  141. * the s/w ack bit vector ]
  142. */
  143. /*
  144. * The payload is software-defined for INTD transactions
  145. */
  146. struct bau_msg_payload {
  147. unsigned long address; /* signifies a page or all TLB's
  148. of the cpu */
  149. /* 64 bits */
  150. unsigned short sending_cpu; /* filled in by sender */
  151. /* 16 bits */
  152. unsigned short acknowledge_count;/* filled in by destination */
  153. /* 16 bits */
  154. unsigned int reserved1:32; /* not usable */
  155. };
  156. /*
  157. * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
  158. * see table 4.2.3.0.1 in broacast_assist spec.
  159. */
  160. struct bau_msg_header {
  161. unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
  162. /* bits 5:0 */
  163. unsigned int base_dest_nasid:15; /* nasid of the */
  164. /* bits 20:6 */ /* first bit in uvhub map */
  165. unsigned int command:8; /* message type */
  166. /* bits 28:21 */
  167. /* 0x38: SN3net EndPoint Message */
  168. unsigned int rsvd_1:3; /* must be zero */
  169. /* bits 31:29 */
  170. /* int will align on 32 bits */
  171. unsigned int rsvd_2:9; /* must be zero */
  172. /* bits 40:32 */
  173. /* Suppl_A is 56-41 */
  174. unsigned int sequence:16;/* message sequence number */
  175. /* bits 56:41 */ /* becomes bytes 16-17 of msg */
  176. /* Address field (96:57) is never used as an
  177. address (these are address bits 42:3) */
  178. unsigned int rsvd_3:1; /* must be zero */
  179. /* bit 57 */
  180. /* address bits 27:4 are payload */
  181. /* these next 24 (58-81) bits become bytes 12-14 of msg */
  182. /* bits 65:58 land in byte 12 */
  183. unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
  184. /* bit 58 */
  185. unsigned int msg_type:3; /* software type of the message*/
  186. /* bits 61:59 */
  187. unsigned int canceled:1; /* message canceled, resource to be freed*/
  188. /* bit 62 */
  189. unsigned int payload_1a:1;/* not currently used */
  190. /* bit 63 */
  191. unsigned int payload_1b:2;/* not currently used */
  192. /* bits 65:64 */
  193. /* bits 73:66 land in byte 13 */
  194. unsigned int payload_1ca:6;/* not currently used */
  195. /* bits 71:66 */
  196. unsigned int payload_1c:2;/* not currently used */
  197. /* bits 73:72 */
  198. /* bits 81:74 land in byte 14 */
  199. unsigned int payload_1d:6;/* not currently used */
  200. /* bits 79:74 */
  201. unsigned int payload_1e:2;/* not currently used */
  202. /* bits 81:80 */
  203. unsigned int rsvd_4:7; /* must be zero */
  204. /* bits 88:82 */
  205. unsigned int sw_ack_flag:1;/* software acknowledge flag */
  206. /* bit 89 */
  207. /* INTD trasactions at destination are to
  208. wait for software acknowledge */
  209. unsigned int rsvd_5:6; /* must be zero */
  210. /* bits 95:90 */
  211. unsigned int rsvd_6:5; /* must be zero */
  212. /* bits 100:96 */
  213. unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */
  214. /* bit 101*/
  215. unsigned int fairness:3;/* usually zero */
  216. /* bits 104:102 */
  217. unsigned int multilevel:1; /* multi-level multicast format */
  218. /* bit 105 */
  219. /* 0 for TLB: endpoint multi-unicast messages */
  220. unsigned int chaining:1;/* next descriptor is part of this activation*/
  221. /* bit 106 */
  222. unsigned int rsvd_7:21; /* must be zero */
  223. /* bits 127:107 */
  224. };
  225. /* see msg_type: */
  226. #define MSG_NOOP 0
  227. #define MSG_REGULAR 1
  228. #define MSG_RETRY 2
  229. /*
  230. * The activation descriptor:
  231. * The format of the message to send, plus all accompanying control
  232. * Should be 64 bytes
  233. */
  234. struct bau_desc {
  235. struct bau_target_uvhubmask distribution;
  236. /*
  237. * message template, consisting of header and payload:
  238. */
  239. struct bau_msg_header header;
  240. struct bau_msg_payload payload;
  241. };
  242. /*
  243. * -payload-- ---------header------
  244. * bytes 0-11 bits 41-56 bits 58-81
  245. * A B (2) C (3)
  246. *
  247. * A/B/C are moved to:
  248. * A C B
  249. * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
  250. * ------------payload queue-----------
  251. */
  252. /*
  253. * The payload queue on the destination side is an array of these.
  254. * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
  255. * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
  256. * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
  257. * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
  258. * sw_ack_vector and payload_2)
  259. * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
  260. * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
  261. * operation."
  262. */
  263. struct bau_payload_queue_entry {
  264. unsigned long address; /* signifies a page or all TLB's
  265. of the cpu */
  266. /* 64 bits, bytes 0-7 */
  267. unsigned short sending_cpu; /* cpu that sent the message */
  268. /* 16 bits, bytes 8-9 */
  269. unsigned short acknowledge_count; /* filled in by destination */
  270. /* 16 bits, bytes 10-11 */
  271. /* these next 3 bytes come from bits 58-81 of the message header */
  272. unsigned short replied_to:1; /* sent as 0 by the source */
  273. unsigned short msg_type:3; /* software message type */
  274. unsigned short canceled:1; /* sent as 0 by the source */
  275. unsigned short unused1:3; /* not currently using */
  276. /* byte 12 */
  277. unsigned char unused2a; /* not currently using */
  278. /* byte 13 */
  279. unsigned char unused2; /* not currently using */
  280. /* byte 14 */
  281. unsigned char sw_ack_vector; /* filled in by the hardware */
  282. /* byte 15 (bits 127:120) */
  283. unsigned short sequence; /* message sequence number */
  284. /* bytes 16-17 */
  285. unsigned char unused4[2]; /* not currently using bytes 18-19 */
  286. /* bytes 18-19 */
  287. int number_of_cpus; /* filled in at destination */
  288. /* 32 bits, bytes 20-23 (aligned) */
  289. unsigned char unused5[8]; /* not using */
  290. /* bytes 24-31 */
  291. };
  292. struct msg_desc {
  293. struct bau_payload_queue_entry *msg;
  294. int msg_slot;
  295. int sw_ack_slot;
  296. struct bau_payload_queue_entry *va_queue_first;
  297. struct bau_payload_queue_entry *va_queue_last;
  298. };
  299. struct reset_args {
  300. int sender;
  301. };
  302. /*
  303. * This structure is allocated per_cpu for UV TLB shootdown statistics.
  304. */
  305. struct ptc_stats {
  306. /* sender statistics */
  307. unsigned long s_giveup; /* number of fall backs to IPI-style flushes */
  308. unsigned long s_requestor; /* number of shootdown requests */
  309. unsigned long s_stimeout; /* source side timeouts */
  310. unsigned long s_dtimeout; /* destination side timeouts */
  311. unsigned long s_time; /* time spent in sending side */
  312. unsigned long s_retriesok; /* successful retries */
  313. unsigned long s_ntargcpu; /* total number of cpu's targeted */
  314. unsigned long s_ntargself; /* times the sending cpu was targeted */
  315. unsigned long s_ntarglocals; /* targets of cpus on the local blade */
  316. unsigned long s_ntargremotes; /* targets of cpus on remote blades */
  317. unsigned long s_ntarglocaluvhub; /* targets of the local hub */
  318. unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */
  319. unsigned long s_ntarguvhub; /* total number of uvhubs targeted */
  320. unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/
  321. unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */
  322. unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */
  323. unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */
  324. unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */
  325. unsigned long s_resets_plug; /* ipi-style resets from plug state */
  326. unsigned long s_resets_timeout; /* ipi-style resets from timeouts */
  327. unsigned long s_busy; /* status stayed busy past s/w timer */
  328. unsigned long s_throttles; /* waits in throttle */
  329. unsigned long s_retry_messages; /* retry broadcasts */
  330. unsigned long s_bau_reenabled; /* for bau enable/disable */
  331. unsigned long s_bau_disabled; /* for bau enable/disable */
  332. /* destination statistics */
  333. unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */
  334. unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */
  335. unsigned long d_multmsg; /* interrupts with multiple messages */
  336. unsigned long d_nomsg; /* interrupts with no message */
  337. unsigned long d_time; /* time spent on destination side */
  338. unsigned long d_requestee; /* number of messages processed */
  339. unsigned long d_retries; /* number of retry messages processed */
  340. unsigned long d_canceled; /* number of messages canceled by retries */
  341. unsigned long d_nocanceled; /* retries that found nothing to cancel */
  342. unsigned long d_resets; /* number of ipi-style requests processed */
  343. unsigned long d_rcanceled; /* number of messages canceled by resets */
  344. };
  345. struct hub_and_pnode {
  346. short uvhub;
  347. short pnode;
  348. };
  349. /*
  350. * one per-cpu; to locate the software tables
  351. */
  352. struct bau_control {
  353. struct bau_desc *descriptor_base;
  354. struct bau_payload_queue_entry *va_queue_first;
  355. struct bau_payload_queue_entry *va_queue_last;
  356. struct bau_payload_queue_entry *bau_msg_head;
  357. struct bau_control *uvhub_master;
  358. struct bau_control *socket_master;
  359. struct ptc_stats *statp;
  360. unsigned long timeout_interval;
  361. unsigned long set_bau_on_time;
  362. atomic_t active_descriptor_count;
  363. int plugged_tries;
  364. int timeout_tries;
  365. int ipi_attempts;
  366. int conseccompletes;
  367. int baudisabled;
  368. int set_bau_off;
  369. short cpu;
  370. short osnode;
  371. short uvhub_cpu;
  372. short uvhub;
  373. short cpus_in_socket;
  374. short cpus_in_uvhub;
  375. short partition_base_pnode;
  376. unsigned short message_number;
  377. unsigned short uvhub_quiesce;
  378. short socket_acknowledge_count[DEST_Q_SIZE];
  379. cycles_t send_message;
  380. spinlock_t uvhub_lock;
  381. spinlock_t queue_lock;
  382. /* tunables */
  383. int max_bau_concurrent;
  384. int max_bau_concurrent_constant;
  385. int plugged_delay;
  386. int plugsb4reset;
  387. int timeoutsb4reset;
  388. int ipi_reset_limit;
  389. int complete_threshold;
  390. int congested_response_us;
  391. int congested_reps;
  392. int congested_period;
  393. cycles_t period_time;
  394. long period_requests;
  395. struct hub_and_pnode *target_hub_and_pnode;
  396. };
  397. static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp)
  398. {
  399. return constant_test_bit(uvhub, &dstp->bits[0]);
  400. }
  401. static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp)
  402. {
  403. __set_bit(pnode, &dstp->bits[0]);
  404. }
  405. static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp,
  406. int nbits)
  407. {
  408. bitmap_zero(&dstp->bits[0], nbits);
  409. }
  410. static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp)
  411. {
  412. return bitmap_weight((unsigned long *)&dstp->bits[0],
  413. UV_DISTRIBUTION_SIZE);
  414. }
  415. static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
  416. {
  417. bitmap_zero(&dstp->bits, nbits);
  418. }
  419. #define cpubit_isset(cpu, bau_local_cpumask) \
  420. test_bit((cpu), (bau_local_cpumask).bits)
  421. extern void uv_bau_message_intr1(void);
  422. extern void uv_bau_timeout_intr1(void);
  423. struct atomic_short {
  424. short counter;
  425. };
  426. /**
  427. * atomic_read_short - read a short atomic variable
  428. * @v: pointer of type atomic_short
  429. *
  430. * Atomically reads the value of @v.
  431. */
  432. static inline int atomic_read_short(const struct atomic_short *v)
  433. {
  434. return v->counter;
  435. }
  436. /**
  437. * atomic_add_short_return - add and return a short int
  438. * @i: short value to add
  439. * @v: pointer of type atomic_short
  440. *
  441. * Atomically adds @i to @v and returns @i + @v
  442. */
  443. static inline int atomic_add_short_return(short i, struct atomic_short *v)
  444. {
  445. short __i = i;
  446. asm volatile(LOCK_PREFIX "xaddw %0, %1"
  447. : "+r" (i), "+m" (v->counter)
  448. : : "memory");
  449. return i + __i;
  450. }
  451. #endif /* _ASM_X86_UV_UV_BAU_H */