ipath_rc.c 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960
  1. /*
  2. * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
  3. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  4. *
  5. * This software is available to you under a choice of one of two
  6. * licenses. You may choose to be licensed under the terms of the GNU
  7. * General Public License (GPL) Version 2, available from the file
  8. * COPYING in the main directory of this source tree, or the
  9. * OpenIB.org BSD license below:
  10. *
  11. * Redistribution and use in source and binary forms, with or
  12. * without modification, are permitted provided that the following
  13. * conditions are met:
  14. *
  15. * - Redistributions of source code must retain the above
  16. * copyright notice, this list of conditions and the following
  17. * disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials
  22. * provided with the distribution.
  23. *
  24. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31. * SOFTWARE.
  32. */
  33. #include <linux/io.h>
  34. #include "ipath_verbs.h"
  35. #include "ipath_kernel.h"
  36. /* cut down ridiculously long IB macro names */
  37. #define OP(x) IB_OPCODE_RC_##x
  38. static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
  39. u32 psn, u32 pmtu)
  40. {
  41. u32 len;
  42. len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
  43. ss->sge = wqe->sg_list[0];
  44. ss->sg_list = wqe->sg_list + 1;
  45. ss->num_sge = wqe->wr.num_sge;
  46. ipath_skip_sge(ss, len);
  47. return wqe->length - len;
  48. }
  49. /**
  50. * ipath_init_restart- initialize the qp->s_sge after a restart
  51. * @qp: the QP who's SGE we're restarting
  52. * @wqe: the work queue to initialize the QP's SGE from
  53. *
  54. * The QP s_lock should be held and interrupts disabled.
  55. */
  56. static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
  57. {
  58. struct ipath_ibdev *dev;
  59. qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
  60. ib_mtu_enum_to_int(qp->path_mtu));
  61. dev = to_idev(qp->ibqp.device);
  62. spin_lock(&dev->pending_lock);
  63. if (list_empty(&qp->timerwait))
  64. list_add_tail(&qp->timerwait,
  65. &dev->pending[dev->pending_index]);
  66. spin_unlock(&dev->pending_lock);
  67. }
  68. /**
  69. * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  70. * @qp: a pointer to the QP
  71. * @ohdr: a pointer to the IB header being constructed
  72. * @pmtu: the path MTU
  73. *
  74. * Return 1 if constructed; otherwise, return 0.
  75. * Note that we are in the responder's side of the QP context.
  76. * Note the QP s_lock must be held.
  77. */
  78. static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
  79. struct ipath_other_headers *ohdr, u32 pmtu)
  80. {
  81. struct ipath_ack_entry *e;
  82. u32 hwords;
  83. u32 len;
  84. u32 bth0;
  85. u32 bth2;
  86. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  87. hwords = 5;
  88. switch (qp->s_ack_state) {
  89. case OP(RDMA_READ_RESPONSE_LAST):
  90. case OP(RDMA_READ_RESPONSE_ONLY):
  91. case OP(ATOMIC_ACKNOWLEDGE):
  92. /*
  93. * We can increment the tail pointer now that the last
  94. * response has been sent instead of only being
  95. * constructed.
  96. */
  97. if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
  98. qp->s_tail_ack_queue = 0;
  99. /* FALLTHROUGH */
  100. case OP(SEND_ONLY):
  101. case OP(ACKNOWLEDGE):
  102. /* Check for no next entry in the queue. */
  103. if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
  104. if (qp->s_flags & IPATH_S_ACK_PENDING)
  105. goto normal;
  106. qp->s_ack_state = OP(ACKNOWLEDGE);
  107. goto bail;
  108. }
  109. e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  110. if (e->opcode == OP(RDMA_READ_REQUEST)) {
  111. /* Copy SGE state in case we need to resend */
  112. qp->s_ack_rdma_sge = e->rdma_sge;
  113. qp->s_cur_sge = &qp->s_ack_rdma_sge;
  114. len = e->rdma_sge.sge.sge_length;
  115. if (len > pmtu) {
  116. len = pmtu;
  117. qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
  118. } else {
  119. qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
  120. e->sent = 1;
  121. }
  122. ohdr->u.aeth = ipath_compute_aeth(qp);
  123. hwords++;
  124. qp->s_ack_rdma_psn = e->psn;
  125. bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
  126. } else {
  127. /* COMPARE_SWAP or FETCH_ADD */
  128. qp->s_cur_sge = NULL;
  129. len = 0;
  130. qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
  131. ohdr->u.at.aeth = ipath_compute_aeth(qp);
  132. ohdr->u.at.atomic_ack_eth[0] =
  133. cpu_to_be32(e->atomic_data >> 32);
  134. ohdr->u.at.atomic_ack_eth[1] =
  135. cpu_to_be32(e->atomic_data);
  136. hwords += sizeof(ohdr->u.at) / sizeof(u32);
  137. bth2 = e->psn;
  138. e->sent = 1;
  139. }
  140. bth0 = qp->s_ack_state << 24;
  141. break;
  142. case OP(RDMA_READ_RESPONSE_FIRST):
  143. qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
  144. /* FALLTHROUGH */
  145. case OP(RDMA_READ_RESPONSE_MIDDLE):
  146. len = qp->s_ack_rdma_sge.sge.sge_length;
  147. if (len > pmtu)
  148. len = pmtu;
  149. else {
  150. ohdr->u.aeth = ipath_compute_aeth(qp);
  151. hwords++;
  152. qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
  153. qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
  154. }
  155. bth0 = qp->s_ack_state << 24;
  156. bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
  157. break;
  158. default:
  159. normal:
  160. /*
  161. * Send a regular ACK.
  162. * Set the s_ack_state so we wait until after sending
  163. * the ACK before setting s_ack_state to ACKNOWLEDGE
  164. * (see above).
  165. */
  166. qp->s_ack_state = OP(SEND_ONLY);
  167. qp->s_flags &= ~IPATH_S_ACK_PENDING;
  168. qp->s_cur_sge = NULL;
  169. if (qp->s_nak_state)
  170. ohdr->u.aeth =
  171. cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
  172. (qp->s_nak_state <<
  173. IPATH_AETH_CREDIT_SHIFT));
  174. else
  175. ohdr->u.aeth = ipath_compute_aeth(qp);
  176. hwords++;
  177. len = 0;
  178. bth0 = OP(ACKNOWLEDGE) << 24;
  179. bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
  180. }
  181. qp->s_hdrwords = hwords;
  182. qp->s_cur_size = len;
  183. ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
  184. return 1;
  185. bail:
  186. return 0;
  187. }
  188. /**
  189. * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  190. * @qp: a pointer to the QP
  191. *
  192. * Return 1 if constructed; otherwise, return 0.
  193. */
  194. int ipath_make_rc_req(struct ipath_qp *qp)
  195. {
  196. struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
  197. struct ipath_other_headers *ohdr;
  198. struct ipath_sge_state *ss;
  199. struct ipath_swqe *wqe;
  200. u32 hwords;
  201. u32 len;
  202. u32 bth0;
  203. u32 bth2;
  204. u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
  205. char newreq;
  206. unsigned long flags;
  207. int ret = 0;
  208. ohdr = &qp->s_hdr.u.oth;
  209. if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
  210. ohdr = &qp->s_hdr.u.l.oth;
  211. /*
  212. * The lock is needed to synchronize between the sending tasklet,
  213. * the receive interrupt handler, and timeout resends.
  214. */
  215. spin_lock_irqsave(&qp->s_lock, flags);
  216. /* Sending responses has higher priority over sending requests. */
  217. if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
  218. (qp->s_flags & IPATH_S_ACK_PENDING) ||
  219. qp->s_ack_state != OP(ACKNOWLEDGE)) &&
  220. ipath_make_rc_ack(dev, qp, ohdr, pmtu))
  221. goto done;
  222. if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
  223. qp->s_rnr_timeout || qp->s_wait_credit)
  224. goto bail;
  225. /* Limit the number of packets sent without an ACK. */
  226. if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
  227. qp->s_wait_credit = 1;
  228. dev->n_rc_stalls++;
  229. goto bail;
  230. }
  231. /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  232. hwords = 5;
  233. bth0 = 1 << 22; /* Set M bit */
  234. /* Send a request. */
  235. wqe = get_swqe_ptr(qp, qp->s_cur);
  236. switch (qp->s_state) {
  237. default:
  238. /*
  239. * Resend an old request or start a new one.
  240. *
  241. * We keep track of the current SWQE so that
  242. * we don't reset the "furthest progress" state
  243. * if we need to back up.
  244. */
  245. newreq = 0;
  246. if (qp->s_cur == qp->s_tail) {
  247. /* Check if send work queue is empty. */
  248. if (qp->s_tail == qp->s_head)
  249. goto bail;
  250. /*
  251. * If a fence is requested, wait for previous
  252. * RDMA read and atomic operations to finish.
  253. */
  254. if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
  255. qp->s_num_rd_atomic) {
  256. qp->s_flags |= IPATH_S_FENCE_PENDING;
  257. goto bail;
  258. }
  259. wqe->psn = qp->s_next_psn;
  260. newreq = 1;
  261. }
  262. /*
  263. * Note that we have to be careful not to modify the
  264. * original work request since we may need to resend
  265. * it.
  266. */
  267. len = wqe->length;
  268. ss = &qp->s_sge;
  269. bth2 = 0;
  270. switch (wqe->wr.opcode) {
  271. case IB_WR_SEND:
  272. case IB_WR_SEND_WITH_IMM:
  273. /* If no credit, return. */
  274. if (qp->s_lsn != (u32) -1 &&
  275. ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
  276. goto bail;
  277. wqe->lpsn = wqe->psn;
  278. if (len > pmtu) {
  279. wqe->lpsn += (len - 1) / pmtu;
  280. qp->s_state = OP(SEND_FIRST);
  281. len = pmtu;
  282. break;
  283. }
  284. if (wqe->wr.opcode == IB_WR_SEND)
  285. qp->s_state = OP(SEND_ONLY);
  286. else {
  287. qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
  288. /* Immediate data comes after the BTH */
  289. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  290. hwords += 1;
  291. }
  292. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  293. bth0 |= 1 << 23;
  294. bth2 = 1 << 31; /* Request ACK. */
  295. if (++qp->s_cur == qp->s_size)
  296. qp->s_cur = 0;
  297. break;
  298. case IB_WR_RDMA_WRITE:
  299. if (newreq && qp->s_lsn != (u32) -1)
  300. qp->s_lsn++;
  301. /* FALLTHROUGH */
  302. case IB_WR_RDMA_WRITE_WITH_IMM:
  303. /* If no credit, return. */
  304. if (qp->s_lsn != (u32) -1 &&
  305. ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
  306. goto bail;
  307. ohdr->u.rc.reth.vaddr =
  308. cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
  309. ohdr->u.rc.reth.rkey =
  310. cpu_to_be32(wqe->wr.wr.rdma.rkey);
  311. ohdr->u.rc.reth.length = cpu_to_be32(len);
  312. hwords += sizeof(struct ib_reth) / sizeof(u32);
  313. wqe->lpsn = wqe->psn;
  314. if (len > pmtu) {
  315. wqe->lpsn += (len - 1) / pmtu;
  316. qp->s_state = OP(RDMA_WRITE_FIRST);
  317. len = pmtu;
  318. break;
  319. }
  320. if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
  321. qp->s_state = OP(RDMA_WRITE_ONLY);
  322. else {
  323. qp->s_state =
  324. OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
  325. /* Immediate data comes after RETH */
  326. ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
  327. hwords += 1;
  328. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  329. bth0 |= 1 << 23;
  330. }
  331. bth2 = 1 << 31; /* Request ACK. */
  332. if (++qp->s_cur == qp->s_size)
  333. qp->s_cur = 0;
  334. break;
  335. case IB_WR_RDMA_READ:
  336. /*
  337. * Don't allow more operations to be started
  338. * than the QP limits allow.
  339. */
  340. if (newreq) {
  341. if (qp->s_num_rd_atomic >=
  342. qp->s_max_rd_atomic) {
  343. qp->s_flags |= IPATH_S_RDMAR_PENDING;
  344. goto bail;
  345. }
  346. qp->s_num_rd_atomic++;
  347. if (qp->s_lsn != (u32) -1)
  348. qp->s_lsn++;
  349. /*
  350. * Adjust s_next_psn to count the
  351. * expected number of responses.
  352. */
  353. if (len > pmtu)
  354. qp->s_next_psn += (len - 1) / pmtu;
  355. wqe->lpsn = qp->s_next_psn++;
  356. }
  357. ohdr->u.rc.reth.vaddr =
  358. cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
  359. ohdr->u.rc.reth.rkey =
  360. cpu_to_be32(wqe->wr.wr.rdma.rkey);
  361. ohdr->u.rc.reth.length = cpu_to_be32(len);
  362. qp->s_state = OP(RDMA_READ_REQUEST);
  363. hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
  364. ss = NULL;
  365. len = 0;
  366. if (++qp->s_cur == qp->s_size)
  367. qp->s_cur = 0;
  368. break;
  369. case IB_WR_ATOMIC_CMP_AND_SWP:
  370. case IB_WR_ATOMIC_FETCH_AND_ADD:
  371. /*
  372. * Don't allow more operations to be started
  373. * than the QP limits allow.
  374. */
  375. if (newreq) {
  376. if (qp->s_num_rd_atomic >=
  377. qp->s_max_rd_atomic) {
  378. qp->s_flags |= IPATH_S_RDMAR_PENDING;
  379. goto bail;
  380. }
  381. qp->s_num_rd_atomic++;
  382. if (qp->s_lsn != (u32) -1)
  383. qp->s_lsn++;
  384. wqe->lpsn = wqe->psn;
  385. }
  386. if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
  387. qp->s_state = OP(COMPARE_SWAP);
  388. ohdr->u.atomic_eth.swap_data = cpu_to_be64(
  389. wqe->wr.wr.atomic.swap);
  390. ohdr->u.atomic_eth.compare_data = cpu_to_be64(
  391. wqe->wr.wr.atomic.compare_add);
  392. } else {
  393. qp->s_state = OP(FETCH_ADD);
  394. ohdr->u.atomic_eth.swap_data = cpu_to_be64(
  395. wqe->wr.wr.atomic.compare_add);
  396. ohdr->u.atomic_eth.compare_data = 0;
  397. }
  398. ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
  399. wqe->wr.wr.atomic.remote_addr >> 32);
  400. ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
  401. wqe->wr.wr.atomic.remote_addr);
  402. ohdr->u.atomic_eth.rkey = cpu_to_be32(
  403. wqe->wr.wr.atomic.rkey);
  404. hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
  405. ss = NULL;
  406. len = 0;
  407. if (++qp->s_cur == qp->s_size)
  408. qp->s_cur = 0;
  409. break;
  410. default:
  411. goto bail;
  412. }
  413. qp->s_sge.sge = wqe->sg_list[0];
  414. qp->s_sge.sg_list = wqe->sg_list + 1;
  415. qp->s_sge.num_sge = wqe->wr.num_sge;
  416. qp->s_len = wqe->length;
  417. if (newreq) {
  418. qp->s_tail++;
  419. if (qp->s_tail >= qp->s_size)
  420. qp->s_tail = 0;
  421. }
  422. bth2 |= qp->s_psn & IPATH_PSN_MASK;
  423. if (wqe->wr.opcode == IB_WR_RDMA_READ)
  424. qp->s_psn = wqe->lpsn + 1;
  425. else {
  426. qp->s_psn++;
  427. if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
  428. qp->s_next_psn = qp->s_psn;
  429. }
  430. /*
  431. * Put the QP on the pending list so lost ACKs will cause
  432. * a retry. More than one request can be pending so the
  433. * QP may already be on the dev->pending list.
  434. */
  435. spin_lock(&dev->pending_lock);
  436. if (list_empty(&qp->timerwait))
  437. list_add_tail(&qp->timerwait,
  438. &dev->pending[dev->pending_index]);
  439. spin_unlock(&dev->pending_lock);
  440. break;
  441. case OP(RDMA_READ_RESPONSE_FIRST):
  442. /*
  443. * This case can only happen if a send is restarted.
  444. * See ipath_restart_rc().
  445. */
  446. ipath_init_restart(qp, wqe);
  447. /* FALLTHROUGH */
  448. case OP(SEND_FIRST):
  449. qp->s_state = OP(SEND_MIDDLE);
  450. /* FALLTHROUGH */
  451. case OP(SEND_MIDDLE):
  452. bth2 = qp->s_psn++ & IPATH_PSN_MASK;
  453. if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
  454. qp->s_next_psn = qp->s_psn;
  455. ss = &qp->s_sge;
  456. len = qp->s_len;
  457. if (len > pmtu) {
  458. len = pmtu;
  459. break;
  460. }
  461. if (wqe->wr.opcode == IB_WR_SEND)
  462. qp->s_state = OP(SEND_LAST);
  463. else {
  464. qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
  465. /* Immediate data comes after the BTH */
  466. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  467. hwords += 1;
  468. }
  469. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  470. bth0 |= 1 << 23;
  471. bth2 |= 1 << 31; /* Request ACK. */
  472. qp->s_cur++;
  473. if (qp->s_cur >= qp->s_size)
  474. qp->s_cur = 0;
  475. break;
  476. case OP(RDMA_READ_RESPONSE_LAST):
  477. /*
  478. * This case can only happen if a RDMA write is restarted.
  479. * See ipath_restart_rc().
  480. */
  481. ipath_init_restart(qp, wqe);
  482. /* FALLTHROUGH */
  483. case OP(RDMA_WRITE_FIRST):
  484. qp->s_state = OP(RDMA_WRITE_MIDDLE);
  485. /* FALLTHROUGH */
  486. case OP(RDMA_WRITE_MIDDLE):
  487. bth2 = qp->s_psn++ & IPATH_PSN_MASK;
  488. if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
  489. qp->s_next_psn = qp->s_psn;
  490. ss = &qp->s_sge;
  491. len = qp->s_len;
  492. if (len > pmtu) {
  493. len = pmtu;
  494. break;
  495. }
  496. if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
  497. qp->s_state = OP(RDMA_WRITE_LAST);
  498. else {
  499. qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
  500. /* Immediate data comes after the BTH */
  501. ohdr->u.imm_data = wqe->wr.ex.imm_data;
  502. hwords += 1;
  503. if (wqe->wr.send_flags & IB_SEND_SOLICITED)
  504. bth0 |= 1 << 23;
  505. }
  506. bth2 |= 1 << 31; /* Request ACK. */
  507. qp->s_cur++;
  508. if (qp->s_cur >= qp->s_size)
  509. qp->s_cur = 0;
  510. break;
  511. case OP(RDMA_READ_RESPONSE_MIDDLE):
  512. /*
  513. * This case can only happen if a RDMA read is restarted.
  514. * See ipath_restart_rc().
  515. */
  516. ipath_init_restart(qp, wqe);
  517. len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
  518. ohdr->u.rc.reth.vaddr =
  519. cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
  520. ohdr->u.rc.reth.rkey =
  521. cpu_to_be32(wqe->wr.wr.rdma.rkey);
  522. ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
  523. qp->s_state = OP(RDMA_READ_REQUEST);
  524. hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
  525. bth2 = qp->s_psn++ & IPATH_PSN_MASK;
  526. if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
  527. qp->s_next_psn = qp->s_psn;
  528. ss = NULL;
  529. len = 0;
  530. qp->s_cur++;
  531. if (qp->s_cur == qp->s_size)
  532. qp->s_cur = 0;
  533. break;
  534. }
  535. if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
  536. bth2 |= 1 << 31; /* Request ACK. */
  537. qp->s_len -= len;
  538. qp->s_hdrwords = hwords;
  539. qp->s_cur_sge = ss;
  540. qp->s_cur_size = len;
  541. ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
  542. done:
  543. ret = 1;
  544. bail:
  545. spin_unlock_irqrestore(&qp->s_lock, flags);
  546. return ret;
  547. }
  548. /**
  549. * send_rc_ack - Construct an ACK packet and send it
  550. * @qp: a pointer to the QP
  551. *
  552. * This is called from ipath_rc_rcv() and only uses the receive
  553. * side QP state.
  554. * Note that RDMA reads and atomics are handled in the
  555. * send side QP state and tasklet.
  556. */
  557. static void send_rc_ack(struct ipath_qp *qp)
  558. {
  559. struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
  560. struct ipath_devdata *dd;
  561. u16 lrh0;
  562. u32 bth0;
  563. u32 hwords;
  564. u32 __iomem *piobuf;
  565. struct ipath_ib_header hdr;
  566. struct ipath_other_headers *ohdr;
  567. unsigned long flags;
  568. spin_lock_irqsave(&qp->s_lock, flags);
  569. /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
  570. if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
  571. (qp->s_flags & IPATH_S_ACK_PENDING) ||
  572. qp->s_ack_state != OP(ACKNOWLEDGE))
  573. goto queue_ack;
  574. spin_unlock_irqrestore(&qp->s_lock, flags);
  575. dd = dev->dd;
  576. piobuf = ipath_getpiobuf(dd, 0, NULL);
  577. if (!piobuf) {
  578. /*
  579. * We are out of PIO buffers at the moment.
  580. * Pass responsibility for sending the ACK to the
  581. * send tasklet so that when a PIO buffer becomes
  582. * available, the ACK is sent ahead of other outgoing
  583. * packets.
  584. */
  585. spin_lock_irqsave(&qp->s_lock, flags);
  586. goto queue_ack;
  587. }
  588. /* Construct the header. */
  589. ohdr = &hdr.u.oth;
  590. lrh0 = IPATH_LRH_BTH;
  591. /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
  592. hwords = 6;
  593. if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
  594. hwords += ipath_make_grh(dev, &hdr.u.l.grh,
  595. &qp->remote_ah_attr.grh,
  596. hwords, 0);
  597. ohdr = &hdr.u.l.oth;
  598. lrh0 = IPATH_LRH_GRH;
  599. }
  600. /* read pkey_index w/o lock (its atomic) */
  601. bth0 = ipath_get_pkey(dd, qp->s_pkey_index) |
  602. (OP(ACKNOWLEDGE) << 24) | (1 << 22);
  603. if (qp->r_nak_state)
  604. ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
  605. (qp->r_nak_state <<
  606. IPATH_AETH_CREDIT_SHIFT));
  607. else
  608. ohdr->u.aeth = ipath_compute_aeth(qp);
  609. lrh0 |= qp->remote_ah_attr.sl << 4;
  610. hdr.lrh[0] = cpu_to_be16(lrh0);
  611. hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
  612. hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
  613. hdr.lrh[3] = cpu_to_be16(dd->ipath_lid);
  614. ohdr->bth[0] = cpu_to_be32(bth0);
  615. ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
  616. ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
  617. writeq(hwords + 1, piobuf);
  618. if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
  619. u32 *hdrp = (u32 *) &hdr;
  620. ipath_flush_wc();
  621. __iowrite32_copy(piobuf + 2, hdrp, hwords - 1);
  622. ipath_flush_wc();
  623. __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
  624. } else
  625. __iowrite32_copy(piobuf + 2, (u32 *) &hdr, hwords);
  626. ipath_flush_wc();
  627. dev->n_unicast_xmit++;
  628. goto done;
  629. queue_ack:
  630. dev->n_rc_qacks++;
  631. qp->s_flags |= IPATH_S_ACK_PENDING;
  632. qp->s_nak_state = qp->r_nak_state;
  633. qp->s_ack_psn = qp->r_ack_psn;
  634. spin_unlock_irqrestore(&qp->s_lock, flags);
  635. /* Call ipath_do_rc_send() in another thread. */
  636. tasklet_hi_schedule(&qp->s_task);
  637. done:
  638. return;
  639. }
  640. /**
  641. * reset_psn - reset the QP state to send starting from PSN
  642. * @qp: the QP
  643. * @psn: the packet sequence number to restart at
  644. *
  645. * This is called from ipath_rc_rcv() to process an incoming RC ACK
  646. * for the given QP.
  647. * Called at interrupt level with the QP s_lock held.
  648. */
  649. static void reset_psn(struct ipath_qp *qp, u32 psn)
  650. {
  651. u32 n = qp->s_last;
  652. struct ipath_swqe *wqe = get_swqe_ptr(qp, n);
  653. u32 opcode;
  654. qp->s_cur = n;
  655. /*
  656. * If we are starting the request from the beginning,
  657. * let the normal send code handle initialization.
  658. */
  659. if (ipath_cmp24(psn, wqe->psn) <= 0) {
  660. qp->s_state = OP(SEND_LAST);
  661. goto done;
  662. }
  663. /* Find the work request opcode corresponding to the given PSN. */
  664. opcode = wqe->wr.opcode;
  665. for (;;) {
  666. int diff;
  667. if (++n == qp->s_size)
  668. n = 0;
  669. if (n == qp->s_tail)
  670. break;
  671. wqe = get_swqe_ptr(qp, n);
  672. diff = ipath_cmp24(psn, wqe->psn);
  673. if (diff < 0)
  674. break;
  675. qp->s_cur = n;
  676. /*
  677. * If we are starting the request from the beginning,
  678. * let the normal send code handle initialization.
  679. */
  680. if (diff == 0) {
  681. qp->s_state = OP(SEND_LAST);
  682. goto done;
  683. }
  684. opcode = wqe->wr.opcode;
  685. }
  686. /*
  687. * Set the state to restart in the middle of a request.
  688. * Don't change the s_sge, s_cur_sge, or s_cur_size.
  689. * See ipath_do_rc_send().
  690. */
  691. switch (opcode) {
  692. case IB_WR_SEND:
  693. case IB_WR_SEND_WITH_IMM:
  694. qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
  695. break;
  696. case IB_WR_RDMA_WRITE:
  697. case IB_WR_RDMA_WRITE_WITH_IMM:
  698. qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
  699. break;
  700. case IB_WR_RDMA_READ:
  701. qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
  702. break;
  703. default:
  704. /*
  705. * This case shouldn't happen since its only
  706. * one PSN per req.
  707. */
  708. qp->s_state = OP(SEND_LAST);
  709. }
  710. done:
  711. qp->s_psn = psn;
  712. }
  713. /**
  714. * ipath_restart_rc - back up requester to resend the last un-ACKed request
  715. * @qp: the QP to restart
  716. * @psn: packet sequence number for the request
  717. * @wc: the work completion request
  718. *
  719. * The QP s_lock should be held and interrupts disabled.
  720. */
  721. void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
  722. {
  723. struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
  724. struct ipath_ibdev *dev;
  725. if (qp->s_retry == 0) {
  726. wc->wr_id = wqe->wr.wr_id;
  727. wc->status = IB_WC_RETRY_EXC_ERR;
  728. wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
  729. wc->vendor_err = 0;
  730. wc->byte_len = 0;
  731. wc->qp = &qp->ibqp;
  732. wc->imm_data = 0;
  733. wc->src_qp = qp->remote_qpn;
  734. wc->wc_flags = 0;
  735. wc->pkey_index = 0;
  736. wc->slid = qp->remote_ah_attr.dlid;
  737. wc->sl = qp->remote_ah_attr.sl;
  738. wc->dlid_path_bits = 0;
  739. wc->port_num = 0;
  740. ipath_sqerror_qp(qp, wc);
  741. goto bail;
  742. }
  743. qp->s_retry--;
  744. /*
  745. * Remove the QP from the timeout queue.
  746. * Note: it may already have been removed by ipath_ib_timer().
  747. */
  748. dev = to_idev(qp->ibqp.device);
  749. spin_lock(&dev->pending_lock);
  750. if (!list_empty(&qp->timerwait))
  751. list_del_init(&qp->timerwait);
  752. spin_unlock(&dev->pending_lock);
  753. if (wqe->wr.opcode == IB_WR_RDMA_READ)
  754. dev->n_rc_resends++;
  755. else
  756. dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
  757. reset_psn(qp, psn);
  758. tasklet_hi_schedule(&qp->s_task);
  759. bail:
  760. return;
  761. }
  762. static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
  763. {
  764. if (qp->s_last_psn != psn) {
  765. qp->s_last_psn = psn;
  766. if (qp->s_wait_credit) {
  767. qp->s_wait_credit = 0;
  768. tasklet_hi_schedule(&qp->s_task);
  769. }
  770. }
  771. }
  772. /**
  773. * do_rc_ack - process an incoming RC ACK
  774. * @qp: the QP the ACK came in on
  775. * @psn: the packet sequence number of the ACK
  776. * @opcode: the opcode of the request that resulted in the ACK
  777. *
  778. * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
  779. * for the given QP.
  780. * Called at interrupt level with the QP s_lock held and interrupts disabled.
  781. * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  782. */
  783. static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
  784. u64 val)
  785. {
  786. struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
  787. struct ib_wc wc;
  788. struct ipath_swqe *wqe;
  789. int ret = 0;
  790. u32 ack_psn;
  791. int diff;
  792. /*
  793. * Remove the QP from the timeout queue (or RNR timeout queue).
  794. * If ipath_ib_timer() has already removed it,
  795. * it's OK since we hold the QP s_lock and ipath_restart_rc()
  796. * just won't find anything to restart if we ACK everything.
  797. */
  798. spin_lock(&dev->pending_lock);
  799. if (!list_empty(&qp->timerwait))
  800. list_del_init(&qp->timerwait);
  801. spin_unlock(&dev->pending_lock);
  802. /*
  803. * Note that NAKs implicitly ACK outstanding SEND and RDMA write
  804. * requests and implicitly NAK RDMA read and atomic requests issued
  805. * before the NAK'ed request. The MSN won't include the NAK'ed
  806. * request but will include an ACK'ed request(s).
  807. */
  808. ack_psn = psn;
  809. if (aeth >> 29)
  810. ack_psn--;
  811. wqe = get_swqe_ptr(qp, qp->s_last);
  812. /*
  813. * The MSN might be for a later WQE than the PSN indicates so
  814. * only complete WQEs that the PSN finishes.
  815. */
  816. while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
  817. /*
  818. * RDMA_READ_RESPONSE_ONLY is a special case since
  819. * we want to generate completion events for everything
  820. * before the RDMA read, copy the data, then generate
  821. * the completion for the read.
  822. */
  823. if (wqe->wr.opcode == IB_WR_RDMA_READ &&
  824. opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
  825. diff == 0) {
  826. ret = 1;
  827. goto bail;
  828. }
  829. /*
  830. * If this request is a RDMA read or atomic, and the ACK is
  831. * for a later operation, this ACK NAKs the RDMA read or
  832. * atomic. In other words, only a RDMA_READ_LAST or ONLY
  833. * can ACK a RDMA read and likewise for atomic ops. Note
  834. * that the NAK case can only happen if relaxed ordering is
  835. * used and requests are sent after an RDMA read or atomic
  836. * is sent but before the response is received.
  837. */
  838. if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
  839. (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
  840. ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  841. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
  842. (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
  843. /*
  844. * The last valid PSN seen is the previous
  845. * request's.
  846. */
  847. update_last_psn(qp, wqe->psn - 1);
  848. /* Retry this request. */
  849. ipath_restart_rc(qp, wqe->psn, &wc);
  850. /*
  851. * No need to process the ACK/NAK since we are
  852. * restarting an earlier request.
  853. */
  854. goto bail;
  855. }
  856. if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  857. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
  858. *(u64 *) wqe->sg_list[0].vaddr = val;
  859. if (qp->s_num_rd_atomic &&
  860. (wqe->wr.opcode == IB_WR_RDMA_READ ||
  861. wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
  862. wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
  863. qp->s_num_rd_atomic--;
  864. /* Restart sending task if fence is complete */
  865. if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
  866. !qp->s_num_rd_atomic) {
  867. qp->s_flags &= ~IPATH_S_FENCE_PENDING;
  868. tasklet_hi_schedule(&qp->s_task);
  869. } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
  870. qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
  871. tasklet_hi_schedule(&qp->s_task);
  872. }
  873. }
  874. /* Post a send completion queue entry if requested. */
  875. if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
  876. (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
  877. wc.wr_id = wqe->wr.wr_id;
  878. wc.status = IB_WC_SUCCESS;
  879. wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
  880. wc.vendor_err = 0;
  881. wc.byte_len = wqe->length;
  882. wc.imm_data = 0;
  883. wc.qp = &qp->ibqp;
  884. wc.src_qp = qp->remote_qpn;
  885. wc.wc_flags = 0;
  886. wc.pkey_index = 0;
  887. wc.slid = qp->remote_ah_attr.dlid;
  888. wc.sl = qp->remote_ah_attr.sl;
  889. wc.dlid_path_bits = 0;
  890. wc.port_num = 0;
  891. ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
  892. }
  893. qp->s_retry = qp->s_retry_cnt;
  894. /*
  895. * If we are completing a request which is in the process of
  896. * being resent, we can stop resending it since we know the
  897. * responder has already seen it.
  898. */
  899. if (qp->s_last == qp->s_cur) {
  900. if (++qp->s_cur >= qp->s_size)
  901. qp->s_cur = 0;
  902. qp->s_last = qp->s_cur;
  903. if (qp->s_last == qp->s_tail)
  904. break;
  905. wqe = get_swqe_ptr(qp, qp->s_cur);
  906. qp->s_state = OP(SEND_LAST);
  907. qp->s_psn = wqe->psn;
  908. } else {
  909. if (++qp->s_last >= qp->s_size)
  910. qp->s_last = 0;
  911. if (qp->s_last == qp->s_tail)
  912. break;
  913. wqe = get_swqe_ptr(qp, qp->s_last);
  914. }
  915. }
  916. switch (aeth >> 29) {
  917. case 0: /* ACK */
  918. dev->n_rc_acks++;
  919. /* If this is a partial ACK, reset the retransmit timer. */
  920. if (qp->s_last != qp->s_tail) {
  921. spin_lock(&dev->pending_lock);
  922. if (list_empty(&qp->timerwait))
  923. list_add_tail(&qp->timerwait,
  924. &dev->pending[dev->pending_index]);
  925. spin_unlock(&dev->pending_lock);
  926. /*
  927. * If we get a partial ACK for a resent operation,
  928. * we can stop resending the earlier packets and
  929. * continue with the next packet the receiver wants.
  930. */
  931. if (ipath_cmp24(qp->s_psn, psn) <= 0) {
  932. reset_psn(qp, psn + 1);
  933. tasklet_hi_schedule(&qp->s_task);
  934. }
  935. } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
  936. qp->s_state = OP(SEND_LAST);
  937. qp->s_psn = psn + 1;
  938. }
  939. ipath_get_credit(qp, aeth);
  940. qp->s_rnr_retry = qp->s_rnr_retry_cnt;
  941. qp->s_retry = qp->s_retry_cnt;
  942. update_last_psn(qp, psn);
  943. ret = 1;
  944. goto bail;
  945. case 1: /* RNR NAK */
  946. dev->n_rnr_naks++;
  947. if (qp->s_last == qp->s_tail)
  948. goto bail;
  949. if (qp->s_rnr_retry == 0) {
  950. wc.status = IB_WC_RNR_RETRY_EXC_ERR;
  951. goto class_b;
  952. }
  953. if (qp->s_rnr_retry_cnt < 7)
  954. qp->s_rnr_retry--;
  955. /* The last valid PSN is the previous PSN. */
  956. update_last_psn(qp, psn - 1);
  957. if (wqe->wr.opcode == IB_WR_RDMA_READ)
  958. dev->n_rc_resends++;
  959. else
  960. dev->n_rc_resends +=
  961. (qp->s_psn - psn) & IPATH_PSN_MASK;
  962. reset_psn(qp, psn);
  963. qp->s_rnr_timeout =
  964. ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
  965. IPATH_AETH_CREDIT_MASK];
  966. ipath_insert_rnr_queue(qp);
  967. goto bail;
  968. case 3: /* NAK */
  969. if (qp->s_last == qp->s_tail)
  970. goto bail;
  971. /* The last valid PSN is the previous PSN. */
  972. update_last_psn(qp, psn - 1);
  973. switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
  974. IPATH_AETH_CREDIT_MASK) {
  975. case 0: /* PSN sequence error */
  976. dev->n_seq_naks++;
  977. /*
  978. * Back up to the responder's expected PSN.
  979. * Note that we might get a NAK in the middle of an
  980. * RDMA READ response which terminates the RDMA
  981. * READ.
  982. */
  983. ipath_restart_rc(qp, psn, &wc);
  984. break;
  985. case 1: /* Invalid Request */
  986. wc.status = IB_WC_REM_INV_REQ_ERR;
  987. dev->n_other_naks++;
  988. goto class_b;
  989. case 2: /* Remote Access Error */
  990. wc.status = IB_WC_REM_ACCESS_ERR;
  991. dev->n_other_naks++;
  992. goto class_b;
  993. case 3: /* Remote Operation Error */
  994. wc.status = IB_WC_REM_OP_ERR;
  995. dev->n_other_naks++;
  996. class_b:
  997. wc.wr_id = wqe->wr.wr_id;
  998. wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
  999. wc.vendor_err = 0;
  1000. wc.byte_len = 0;
  1001. wc.qp = &qp->ibqp;
  1002. wc.imm_data = 0;
  1003. wc.src_qp = qp->remote_qpn;
  1004. wc.wc_flags = 0;
  1005. wc.pkey_index = 0;
  1006. wc.slid = qp->remote_ah_attr.dlid;
  1007. wc.sl = qp->remote_ah_attr.sl;
  1008. wc.dlid_path_bits = 0;
  1009. wc.port_num = 0;
  1010. ipath_sqerror_qp(qp, &wc);
  1011. break;
  1012. default:
  1013. /* Ignore other reserved NAK error codes */
  1014. goto reserved;
  1015. }
  1016. qp->s_rnr_retry = qp->s_rnr_retry_cnt;
  1017. goto bail;
  1018. default: /* 2: reserved */
  1019. reserved:
  1020. /* Ignore reserved NAK codes. */
  1021. goto bail;
  1022. }
  1023. bail:
  1024. return ret;
  1025. }
  1026. /**
  1027. * ipath_rc_rcv_resp - process an incoming RC response packet
  1028. * @dev: the device this packet came in on
  1029. * @ohdr: the other headers for this packet
  1030. * @data: the packet data
  1031. * @tlen: the packet length
  1032. * @qp: the QP for this packet
  1033. * @opcode: the opcode for this packet
  1034. * @psn: the packet sequence number for this packet
  1035. * @hdrsize: the header length
  1036. * @pmtu: the path MTU
  1037. * @header_in_data: true if part of the header data is in the data buffer
  1038. *
  1039. * This is called from ipath_rc_rcv() to process an incoming RC response
  1040. * packet for the given QP.
  1041. * Called at interrupt level.
  1042. */
  1043. static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
  1044. struct ipath_other_headers *ohdr,
  1045. void *data, u32 tlen,
  1046. struct ipath_qp *qp,
  1047. u32 opcode,
  1048. u32 psn, u32 hdrsize, u32 pmtu,
  1049. int header_in_data)
  1050. {
  1051. struct ipath_swqe *wqe;
  1052. unsigned long flags;
  1053. struct ib_wc wc;
  1054. int diff;
  1055. u32 pad;
  1056. u32 aeth;
  1057. u64 val;
  1058. spin_lock_irqsave(&qp->s_lock, flags);
  1059. /* Ignore invalid responses. */
  1060. if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
  1061. goto ack_done;
  1062. /* Ignore duplicate responses. */
  1063. diff = ipath_cmp24(psn, qp->s_last_psn);
  1064. if (unlikely(diff <= 0)) {
  1065. /* Update credits for "ghost" ACKs */
  1066. if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
  1067. if (!header_in_data)
  1068. aeth = be32_to_cpu(ohdr->u.aeth);
  1069. else {
  1070. aeth = be32_to_cpu(((__be32 *) data)[0]);
  1071. data += sizeof(__be32);
  1072. }
  1073. if ((aeth >> 29) == 0)
  1074. ipath_get_credit(qp, aeth);
  1075. }
  1076. goto ack_done;
  1077. }
  1078. if (unlikely(qp->s_last == qp->s_tail))
  1079. goto ack_done;
  1080. wqe = get_swqe_ptr(qp, qp->s_last);
  1081. switch (opcode) {
  1082. case OP(ACKNOWLEDGE):
  1083. case OP(ATOMIC_ACKNOWLEDGE):
  1084. case OP(RDMA_READ_RESPONSE_FIRST):
  1085. if (!header_in_data)
  1086. aeth = be32_to_cpu(ohdr->u.aeth);
  1087. else {
  1088. aeth = be32_to_cpu(((__be32 *) data)[0]);
  1089. data += sizeof(__be32);
  1090. }
  1091. if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
  1092. if (!header_in_data) {
  1093. __be32 *p = ohdr->u.at.atomic_ack_eth;
  1094. val = ((u64) be32_to_cpu(p[0]) << 32) |
  1095. be32_to_cpu(p[1]);
  1096. } else
  1097. val = be64_to_cpu(((__be64 *) data)[0]);
  1098. } else
  1099. val = 0;
  1100. if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
  1101. opcode != OP(RDMA_READ_RESPONSE_FIRST))
  1102. goto ack_done;
  1103. hdrsize += 4;
  1104. wqe = get_swqe_ptr(qp, qp->s_last);
  1105. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  1106. goto ack_op_err;
  1107. /*
  1108. * If this is a response to a resent RDMA read, we
  1109. * have to be careful to copy the data to the right
  1110. * location.
  1111. */
  1112. qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
  1113. wqe, psn, pmtu);
  1114. goto read_middle;
  1115. case OP(RDMA_READ_RESPONSE_MIDDLE):
  1116. /* no AETH, no ACK */
  1117. if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
  1118. dev->n_rdma_seq++;
  1119. ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
  1120. goto ack_done;
  1121. }
  1122. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  1123. goto ack_op_err;
  1124. read_middle:
  1125. if (unlikely(tlen != (hdrsize + pmtu + 4)))
  1126. goto ack_len_err;
  1127. if (unlikely(pmtu >= qp->s_rdma_read_len))
  1128. goto ack_len_err;
  1129. /* We got a response so update the timeout. */
  1130. spin_lock(&dev->pending_lock);
  1131. if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
  1132. list_move_tail(&qp->timerwait,
  1133. &dev->pending[dev->pending_index]);
  1134. spin_unlock(&dev->pending_lock);
  1135. if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
  1136. qp->s_retry = qp->s_retry_cnt;
  1137. /*
  1138. * Update the RDMA receive state but do the copy w/o
  1139. * holding the locks and blocking interrupts.
  1140. */
  1141. qp->s_rdma_read_len -= pmtu;
  1142. update_last_psn(qp, psn);
  1143. spin_unlock_irqrestore(&qp->s_lock, flags);
  1144. ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
  1145. goto bail;
  1146. case OP(RDMA_READ_RESPONSE_ONLY):
  1147. if (!header_in_data)
  1148. aeth = be32_to_cpu(ohdr->u.aeth);
  1149. else
  1150. aeth = be32_to_cpu(((__be32 *) data)[0]);
  1151. if (!do_rc_ack(qp, aeth, psn, opcode, 0))
  1152. goto ack_done;
  1153. /* Get the number of bytes the message was padded by. */
  1154. pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
  1155. /*
  1156. * Check that the data size is >= 0 && <= pmtu.
  1157. * Remember to account for the AETH header (4) and
  1158. * ICRC (4).
  1159. */
  1160. if (unlikely(tlen < (hdrsize + pad + 8)))
  1161. goto ack_len_err;
  1162. /*
  1163. * If this is a response to a resent RDMA read, we
  1164. * have to be careful to copy the data to the right
  1165. * location.
  1166. */
  1167. wqe = get_swqe_ptr(qp, qp->s_last);
  1168. qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
  1169. wqe, psn, pmtu);
  1170. goto read_last;
  1171. case OP(RDMA_READ_RESPONSE_LAST):
  1172. /* ACKs READ req. */
  1173. if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
  1174. dev->n_rdma_seq++;
  1175. ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
  1176. goto ack_done;
  1177. }
  1178. if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
  1179. goto ack_op_err;
  1180. /* Get the number of bytes the message was padded by. */
  1181. pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
  1182. /*
  1183. * Check that the data size is >= 1 && <= pmtu.
  1184. * Remember to account for the AETH header (4) and
  1185. * ICRC (4).
  1186. */
  1187. if (unlikely(tlen <= (hdrsize + pad + 8)))
  1188. goto ack_len_err;
  1189. read_last:
  1190. tlen -= hdrsize + pad + 8;
  1191. if (unlikely(tlen != qp->s_rdma_read_len))
  1192. goto ack_len_err;
  1193. if (!header_in_data)
  1194. aeth = be32_to_cpu(ohdr->u.aeth);
  1195. else {
  1196. aeth = be32_to_cpu(((__be32 *) data)[0]);
  1197. data += sizeof(__be32);
  1198. }
  1199. ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
  1200. (void) do_rc_ack(qp, aeth, psn,
  1201. OP(RDMA_READ_RESPONSE_LAST), 0);
  1202. goto ack_done;
  1203. }
  1204. ack_done:
  1205. spin_unlock_irqrestore(&qp->s_lock, flags);
  1206. goto bail;
  1207. ack_op_err:
  1208. wc.status = IB_WC_LOC_QP_OP_ERR;
  1209. goto ack_err;
  1210. ack_len_err:
  1211. wc.status = IB_WC_LOC_LEN_ERR;
  1212. ack_err:
  1213. wc.wr_id = wqe->wr.wr_id;
  1214. wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
  1215. wc.vendor_err = 0;
  1216. wc.byte_len = 0;
  1217. wc.imm_data = 0;
  1218. wc.qp = &qp->ibqp;
  1219. wc.src_qp = qp->remote_qpn;
  1220. wc.wc_flags = 0;
  1221. wc.pkey_index = 0;
  1222. wc.slid = qp->remote_ah_attr.dlid;
  1223. wc.sl = qp->remote_ah_attr.sl;
  1224. wc.dlid_path_bits = 0;
  1225. wc.port_num = 0;
  1226. ipath_sqerror_qp(qp, &wc);
  1227. spin_unlock_irqrestore(&qp->s_lock, flags);
  1228. bail:
  1229. return;
  1230. }
  1231. /**
  1232. * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
  1233. * @dev: the device this packet came in on
  1234. * @ohdr: the other headers for this packet
  1235. * @data: the packet data
  1236. * @qp: the QP for this packet
  1237. * @opcode: the opcode for this packet
  1238. * @psn: the packet sequence number for this packet
  1239. * @diff: the difference between the PSN and the expected PSN
  1240. * @header_in_data: true if part of the header data is in the data buffer
  1241. *
  1242. * This is called from ipath_rc_rcv() to process an unexpected
  1243. * incoming RC packet for the given QP.
  1244. * Called at interrupt level.
  1245. * Return 1 if no more processing is needed; otherwise return 0 to
  1246. * schedule a response to be sent.
  1247. */
  1248. static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
  1249. struct ipath_other_headers *ohdr,
  1250. void *data,
  1251. struct ipath_qp *qp,
  1252. u32 opcode,
  1253. u32 psn,
  1254. int diff,
  1255. int header_in_data)
  1256. {
  1257. struct ipath_ack_entry *e;
  1258. u8 i, prev;
  1259. int old_req;
  1260. unsigned long flags;
  1261. if (diff > 0) {
  1262. /*
  1263. * Packet sequence error.
  1264. * A NAK will ACK earlier sends and RDMA writes.
  1265. * Don't queue the NAK if we already sent one.
  1266. */
  1267. if (!qp->r_nak_state) {
  1268. qp->r_nak_state = IB_NAK_PSN_ERROR;
  1269. /* Use the expected PSN. */
  1270. qp->r_ack_psn = qp->r_psn;
  1271. goto send_ack;
  1272. }
  1273. goto done;
  1274. }
  1275. /*
  1276. * Handle a duplicate request. Don't re-execute SEND, RDMA
  1277. * write or atomic op. Don't NAK errors, just silently drop
  1278. * the duplicate request. Note that r_sge, r_len, and
  1279. * r_rcv_len may be in use so don't modify them.
  1280. *
  1281. * We are supposed to ACK the earliest duplicate PSN but we
  1282. * can coalesce an outstanding duplicate ACK. We have to
  1283. * send the earliest so that RDMA reads can be restarted at
  1284. * the requester's expected PSN.
  1285. *
  1286. * First, find where this duplicate PSN falls within the
  1287. * ACKs previously sent.
  1288. */
  1289. psn &= IPATH_PSN_MASK;
  1290. e = NULL;
  1291. old_req = 1;
  1292. spin_lock_irqsave(&qp->s_lock, flags);
  1293. for (i = qp->r_head_ack_queue; ; i = prev) {
  1294. if (i == qp->s_tail_ack_queue)
  1295. old_req = 0;
  1296. if (i)
  1297. prev = i - 1;
  1298. else
  1299. prev = IPATH_MAX_RDMA_ATOMIC;
  1300. if (prev == qp->r_head_ack_queue) {
  1301. e = NULL;
  1302. break;
  1303. }
  1304. e = &qp->s_ack_queue[prev];
  1305. if (!e->opcode) {
  1306. e = NULL;
  1307. break;
  1308. }
  1309. if (ipath_cmp24(psn, e->psn) >= 0) {
  1310. if (prev == qp->s_tail_ack_queue)
  1311. old_req = 0;
  1312. break;
  1313. }
  1314. }
  1315. switch (opcode) {
  1316. case OP(RDMA_READ_REQUEST): {
  1317. struct ib_reth *reth;
  1318. u32 offset;
  1319. u32 len;
  1320. /*
  1321. * If we didn't find the RDMA read request in the ack queue,
  1322. * or the send tasklet is already backed up to send an
  1323. * earlier entry, we can ignore this request.
  1324. */
  1325. if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
  1326. goto unlock_done;
  1327. /* RETH comes after BTH */
  1328. if (!header_in_data)
  1329. reth = &ohdr->u.rc.reth;
  1330. else {
  1331. reth = (struct ib_reth *)data;
  1332. data += sizeof(*reth);
  1333. }
  1334. /*
  1335. * Address range must be a subset of the original
  1336. * request and start on pmtu boundaries.
  1337. * We reuse the old ack_queue slot since the requester
  1338. * should not back up and request an earlier PSN for the
  1339. * same request.
  1340. */
  1341. offset = ((psn - e->psn) & IPATH_PSN_MASK) *
  1342. ib_mtu_enum_to_int(qp->path_mtu);
  1343. len = be32_to_cpu(reth->length);
  1344. if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
  1345. goto unlock_done;
  1346. if (len != 0) {
  1347. u32 rkey = be32_to_cpu(reth->rkey);
  1348. u64 vaddr = be64_to_cpu(reth->vaddr);
  1349. int ok;
  1350. ok = ipath_rkey_ok(qp, &e->rdma_sge,
  1351. len, vaddr, rkey,
  1352. IB_ACCESS_REMOTE_READ);
  1353. if (unlikely(!ok))
  1354. goto unlock_done;
  1355. } else {
  1356. e->rdma_sge.sg_list = NULL;
  1357. e->rdma_sge.num_sge = 0;
  1358. e->rdma_sge.sge.mr = NULL;
  1359. e->rdma_sge.sge.vaddr = NULL;
  1360. e->rdma_sge.sge.length = 0;
  1361. e->rdma_sge.sge.sge_length = 0;
  1362. }
  1363. e->psn = psn;
  1364. qp->s_ack_state = OP(ACKNOWLEDGE);
  1365. qp->s_tail_ack_queue = prev;
  1366. break;
  1367. }
  1368. case OP(COMPARE_SWAP):
  1369. case OP(FETCH_ADD): {
  1370. /*
  1371. * If we didn't find the atomic request in the ack queue
  1372. * or the send tasklet is already backed up to send an
  1373. * earlier entry, we can ignore this request.
  1374. */
  1375. if (!e || e->opcode != (u8) opcode || old_req)
  1376. goto unlock_done;
  1377. qp->s_ack_state = OP(ACKNOWLEDGE);
  1378. qp->s_tail_ack_queue = prev;
  1379. break;
  1380. }
  1381. default:
  1382. if (old_req)
  1383. goto unlock_done;
  1384. /*
  1385. * Resend the most recent ACK if this request is
  1386. * after all the previous RDMA reads and atomics.
  1387. */
  1388. if (i == qp->r_head_ack_queue) {
  1389. spin_unlock_irqrestore(&qp->s_lock, flags);
  1390. qp->r_nak_state = 0;
  1391. qp->r_ack_psn = qp->r_psn - 1;
  1392. goto send_ack;
  1393. }
  1394. /*
  1395. * Try to send a simple ACK to work around a Mellanox bug
  1396. * which doesn't accept a RDMA read response or atomic
  1397. * response as an ACK for earlier SENDs or RDMA writes.
  1398. */
  1399. if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
  1400. !(qp->s_flags & IPATH_S_ACK_PENDING) &&
  1401. qp->s_ack_state == OP(ACKNOWLEDGE)) {
  1402. spin_unlock_irqrestore(&qp->s_lock, flags);
  1403. qp->r_nak_state = 0;
  1404. qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
  1405. goto send_ack;
  1406. }
  1407. /*
  1408. * Resend the RDMA read or atomic op which
  1409. * ACKs this duplicate request.
  1410. */
  1411. qp->s_ack_state = OP(ACKNOWLEDGE);
  1412. qp->s_tail_ack_queue = i;
  1413. break;
  1414. }
  1415. qp->r_nak_state = 0;
  1416. tasklet_hi_schedule(&qp->s_task);
  1417. unlock_done:
  1418. spin_unlock_irqrestore(&qp->s_lock, flags);
  1419. done:
  1420. return 1;
  1421. send_ack:
  1422. return 0;
  1423. }
  1424. static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
  1425. {
  1426. unsigned long flags;
  1427. int lastwqe;
  1428. spin_lock_irqsave(&qp->s_lock, flags);
  1429. qp->state = IB_QPS_ERR;
  1430. lastwqe = ipath_error_qp(qp, err);
  1431. spin_unlock_irqrestore(&qp->s_lock, flags);
  1432. if (lastwqe) {
  1433. struct ib_event ev;
  1434. ev.device = qp->ibqp.device;
  1435. ev.element.qp = &qp->ibqp;
  1436. ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
  1437. qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
  1438. }
  1439. }
  1440. static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
  1441. {
  1442. unsigned long flags;
  1443. unsigned next;
  1444. next = n + 1;
  1445. if (next > IPATH_MAX_RDMA_ATOMIC)
  1446. next = 0;
  1447. spin_lock_irqsave(&qp->s_lock, flags);
  1448. if (n == qp->s_tail_ack_queue) {
  1449. qp->s_tail_ack_queue = next;
  1450. qp->s_ack_state = OP(ACKNOWLEDGE);
  1451. }
  1452. spin_unlock_irqrestore(&qp->s_lock, flags);
  1453. }
  1454. /**
  1455. * ipath_rc_rcv - process an incoming RC packet
  1456. * @dev: the device this packet came in on
  1457. * @hdr: the header of this packet
  1458. * @has_grh: true if the header has a GRH
  1459. * @data: the packet data
  1460. * @tlen: the packet length
  1461. * @qp: the QP for this packet
  1462. *
  1463. * This is called from ipath_qp_rcv() to process an incoming RC packet
  1464. * for the given QP.
  1465. * Called at interrupt level.
  1466. */
  1467. void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
  1468. int has_grh, void *data, u32 tlen, struct ipath_qp *qp)
  1469. {
  1470. struct ipath_other_headers *ohdr;
  1471. u32 opcode;
  1472. u32 hdrsize;
  1473. u32 psn;
  1474. u32 pad;
  1475. struct ib_wc wc;
  1476. u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
  1477. int diff;
  1478. struct ib_reth *reth;
  1479. int header_in_data;
  1480. /* Validate the SLID. See Ch. 9.6.1.5 */
  1481. if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
  1482. goto done;
  1483. /* Check for GRH */
  1484. if (!has_grh) {
  1485. ohdr = &hdr->u.oth;
  1486. hdrsize = 8 + 12; /* LRH + BTH */
  1487. psn = be32_to_cpu(ohdr->bth[2]);
  1488. header_in_data = 0;
  1489. } else {
  1490. ohdr = &hdr->u.l.oth;
  1491. hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
  1492. /*
  1493. * The header with GRH is 60 bytes and the core driver sets
  1494. * the eager header buffer size to 56 bytes so the last 4
  1495. * bytes of the BTH header (PSN) is in the data buffer.
  1496. */
  1497. header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
  1498. if (header_in_data) {
  1499. psn = be32_to_cpu(((__be32 *) data)[0]);
  1500. data += sizeof(__be32);
  1501. } else
  1502. psn = be32_to_cpu(ohdr->bth[2]);
  1503. }
  1504. /*
  1505. * Process responses (ACKs) before anything else. Note that the
  1506. * packet sequence number will be for something in the send work
  1507. * queue rather than the expected receive packet sequence number.
  1508. * In other words, this QP is the requester.
  1509. */
  1510. opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
  1511. if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
  1512. opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
  1513. ipath_rc_rcv_resp(dev, ohdr, data, tlen, qp, opcode, psn,
  1514. hdrsize, pmtu, header_in_data);
  1515. goto done;
  1516. }
  1517. /* Compute 24 bits worth of difference. */
  1518. diff = ipath_cmp24(psn, qp->r_psn);
  1519. if (unlikely(diff)) {
  1520. if (ipath_rc_rcv_error(dev, ohdr, data, qp, opcode,
  1521. psn, diff, header_in_data))
  1522. goto done;
  1523. goto send_ack;
  1524. }
  1525. /* Check for opcode sequence errors. */
  1526. switch (qp->r_state) {
  1527. case OP(SEND_FIRST):
  1528. case OP(SEND_MIDDLE):
  1529. if (opcode == OP(SEND_MIDDLE) ||
  1530. opcode == OP(SEND_LAST) ||
  1531. opcode == OP(SEND_LAST_WITH_IMMEDIATE))
  1532. break;
  1533. nack_inv:
  1534. ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
  1535. qp->r_nak_state = IB_NAK_INVALID_REQUEST;
  1536. qp->r_ack_psn = qp->r_psn;
  1537. goto send_ack;
  1538. case OP(RDMA_WRITE_FIRST):
  1539. case OP(RDMA_WRITE_MIDDLE):
  1540. if (opcode == OP(RDMA_WRITE_MIDDLE) ||
  1541. opcode == OP(RDMA_WRITE_LAST) ||
  1542. opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
  1543. break;
  1544. goto nack_inv;
  1545. default:
  1546. if (opcode == OP(SEND_MIDDLE) ||
  1547. opcode == OP(SEND_LAST) ||
  1548. opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
  1549. opcode == OP(RDMA_WRITE_MIDDLE) ||
  1550. opcode == OP(RDMA_WRITE_LAST) ||
  1551. opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
  1552. goto nack_inv;
  1553. /*
  1554. * Note that it is up to the requester to not send a new
  1555. * RDMA read or atomic operation before receiving an ACK
  1556. * for the previous operation.
  1557. */
  1558. break;
  1559. }
  1560. wc.imm_data = 0;
  1561. wc.wc_flags = 0;
  1562. /* OK, process the packet. */
  1563. switch (opcode) {
  1564. case OP(SEND_FIRST):
  1565. if (!ipath_get_rwqe(qp, 0)) {
  1566. rnr_nak:
  1567. qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
  1568. qp->r_ack_psn = qp->r_psn;
  1569. goto send_ack;
  1570. }
  1571. qp->r_rcv_len = 0;
  1572. /* FALLTHROUGH */
  1573. case OP(SEND_MIDDLE):
  1574. case OP(RDMA_WRITE_MIDDLE):
  1575. send_middle:
  1576. /* Check for invalid length PMTU or posted rwqe len. */
  1577. if (unlikely(tlen != (hdrsize + pmtu + 4)))
  1578. goto nack_inv;
  1579. qp->r_rcv_len += pmtu;
  1580. if (unlikely(qp->r_rcv_len > qp->r_len))
  1581. goto nack_inv;
  1582. ipath_copy_sge(&qp->r_sge, data, pmtu);
  1583. break;
  1584. case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
  1585. /* consume RWQE */
  1586. if (!ipath_get_rwqe(qp, 1))
  1587. goto rnr_nak;
  1588. goto send_last_imm;
  1589. case OP(SEND_ONLY):
  1590. case OP(SEND_ONLY_WITH_IMMEDIATE):
  1591. if (!ipath_get_rwqe(qp, 0))
  1592. goto rnr_nak;
  1593. qp->r_rcv_len = 0;
  1594. if (opcode == OP(SEND_ONLY))
  1595. goto send_last;
  1596. /* FALLTHROUGH */
  1597. case OP(SEND_LAST_WITH_IMMEDIATE):
  1598. send_last_imm:
  1599. if (header_in_data) {
  1600. wc.imm_data = *(__be32 *) data;
  1601. data += sizeof(__be32);
  1602. } else {
  1603. /* Immediate data comes after BTH */
  1604. wc.imm_data = ohdr->u.imm_data;
  1605. }
  1606. hdrsize += 4;
  1607. wc.wc_flags = IB_WC_WITH_IMM;
  1608. /* FALLTHROUGH */
  1609. case OP(SEND_LAST):
  1610. case OP(RDMA_WRITE_LAST):
  1611. send_last:
  1612. /* Get the number of bytes the message was padded by. */
  1613. pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
  1614. /* Check for invalid length. */
  1615. /* XXX LAST len should be >= 1 */
  1616. if (unlikely(tlen < (hdrsize + pad + 4)))
  1617. goto nack_inv;
  1618. /* Don't count the CRC. */
  1619. tlen -= (hdrsize + pad + 4);
  1620. wc.byte_len = tlen + qp->r_rcv_len;
  1621. if (unlikely(wc.byte_len > qp->r_len))
  1622. goto nack_inv;
  1623. ipath_copy_sge(&qp->r_sge, data, tlen);
  1624. qp->r_msn++;
  1625. if (!qp->r_wrid_valid)
  1626. break;
  1627. qp->r_wrid_valid = 0;
  1628. wc.wr_id = qp->r_wr_id;
  1629. wc.status = IB_WC_SUCCESS;
  1630. wc.opcode = IB_WC_RECV;
  1631. wc.vendor_err = 0;
  1632. wc.qp = &qp->ibqp;
  1633. wc.src_qp = qp->remote_qpn;
  1634. wc.pkey_index = 0;
  1635. wc.slid = qp->remote_ah_attr.dlid;
  1636. wc.sl = qp->remote_ah_attr.sl;
  1637. wc.dlid_path_bits = 0;
  1638. wc.port_num = 0;
  1639. /* Signal completion event if the solicited bit is set. */
  1640. ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
  1641. (ohdr->bth[0] &
  1642. __constant_cpu_to_be32(1 << 23)) != 0);
  1643. break;
  1644. case OP(RDMA_WRITE_FIRST):
  1645. case OP(RDMA_WRITE_ONLY):
  1646. case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
  1647. if (unlikely(!(qp->qp_access_flags &
  1648. IB_ACCESS_REMOTE_WRITE)))
  1649. goto nack_inv;
  1650. /* consume RWQE */
  1651. /* RETH comes after BTH */
  1652. if (!header_in_data)
  1653. reth = &ohdr->u.rc.reth;
  1654. else {
  1655. reth = (struct ib_reth *)data;
  1656. data += sizeof(*reth);
  1657. }
  1658. hdrsize += sizeof(*reth);
  1659. qp->r_len = be32_to_cpu(reth->length);
  1660. qp->r_rcv_len = 0;
  1661. if (qp->r_len != 0) {
  1662. u32 rkey = be32_to_cpu(reth->rkey);
  1663. u64 vaddr = be64_to_cpu(reth->vaddr);
  1664. int ok;
  1665. /* Check rkey & NAK */
  1666. ok = ipath_rkey_ok(qp, &qp->r_sge,
  1667. qp->r_len, vaddr, rkey,
  1668. IB_ACCESS_REMOTE_WRITE);
  1669. if (unlikely(!ok))
  1670. goto nack_acc;
  1671. } else {
  1672. qp->r_sge.sg_list = NULL;
  1673. qp->r_sge.sge.mr = NULL;
  1674. qp->r_sge.sge.vaddr = NULL;
  1675. qp->r_sge.sge.length = 0;
  1676. qp->r_sge.sge.sge_length = 0;
  1677. }
  1678. if (opcode == OP(RDMA_WRITE_FIRST))
  1679. goto send_middle;
  1680. else if (opcode == OP(RDMA_WRITE_ONLY))
  1681. goto send_last;
  1682. if (!ipath_get_rwqe(qp, 1))
  1683. goto rnr_nak;
  1684. goto send_last_imm;
  1685. case OP(RDMA_READ_REQUEST): {
  1686. struct ipath_ack_entry *e;
  1687. u32 len;
  1688. u8 next;
  1689. if (unlikely(!(qp->qp_access_flags &
  1690. IB_ACCESS_REMOTE_READ)))
  1691. goto nack_inv;
  1692. next = qp->r_head_ack_queue + 1;
  1693. if (next > IPATH_MAX_RDMA_ATOMIC)
  1694. next = 0;
  1695. if (unlikely(next == qp->s_tail_ack_queue)) {
  1696. if (!qp->s_ack_queue[next].sent)
  1697. goto nack_inv;
  1698. ipath_update_ack_queue(qp, next);
  1699. }
  1700. e = &qp->s_ack_queue[qp->r_head_ack_queue];
  1701. /* RETH comes after BTH */
  1702. if (!header_in_data)
  1703. reth = &ohdr->u.rc.reth;
  1704. else {
  1705. reth = (struct ib_reth *)data;
  1706. data += sizeof(*reth);
  1707. }
  1708. len = be32_to_cpu(reth->length);
  1709. if (len) {
  1710. u32 rkey = be32_to_cpu(reth->rkey);
  1711. u64 vaddr = be64_to_cpu(reth->vaddr);
  1712. int ok;
  1713. /* Check rkey & NAK */
  1714. ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
  1715. rkey, IB_ACCESS_REMOTE_READ);
  1716. if (unlikely(!ok))
  1717. goto nack_acc;
  1718. /*
  1719. * Update the next expected PSN. We add 1 later
  1720. * below, so only add the remainder here.
  1721. */
  1722. if (len > pmtu)
  1723. qp->r_psn += (len - 1) / pmtu;
  1724. } else {
  1725. e->rdma_sge.sg_list = NULL;
  1726. e->rdma_sge.num_sge = 0;
  1727. e->rdma_sge.sge.mr = NULL;
  1728. e->rdma_sge.sge.vaddr = NULL;
  1729. e->rdma_sge.sge.length = 0;
  1730. e->rdma_sge.sge.sge_length = 0;
  1731. }
  1732. e->opcode = opcode;
  1733. e->sent = 0;
  1734. e->psn = psn;
  1735. /*
  1736. * We need to increment the MSN here instead of when we
  1737. * finish sending the result since a duplicate request would
  1738. * increment it more than once.
  1739. */
  1740. qp->r_msn++;
  1741. qp->r_psn++;
  1742. qp->r_state = opcode;
  1743. qp->r_nak_state = 0;
  1744. barrier();
  1745. qp->r_head_ack_queue = next;
  1746. /* Call ipath_do_rc_send() in another thread. */
  1747. tasklet_hi_schedule(&qp->s_task);
  1748. goto done;
  1749. }
  1750. case OP(COMPARE_SWAP):
  1751. case OP(FETCH_ADD): {
  1752. struct ib_atomic_eth *ateth;
  1753. struct ipath_ack_entry *e;
  1754. u64 vaddr;
  1755. atomic64_t *maddr;
  1756. u64 sdata;
  1757. u32 rkey;
  1758. u8 next;
  1759. if (unlikely(!(qp->qp_access_flags &
  1760. IB_ACCESS_REMOTE_ATOMIC)))
  1761. goto nack_inv;
  1762. next = qp->r_head_ack_queue + 1;
  1763. if (next > IPATH_MAX_RDMA_ATOMIC)
  1764. next = 0;
  1765. if (unlikely(next == qp->s_tail_ack_queue)) {
  1766. if (!qp->s_ack_queue[next].sent)
  1767. goto nack_inv;
  1768. ipath_update_ack_queue(qp, next);
  1769. }
  1770. if (!header_in_data)
  1771. ateth = &ohdr->u.atomic_eth;
  1772. else
  1773. ateth = (struct ib_atomic_eth *)data;
  1774. vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
  1775. be32_to_cpu(ateth->vaddr[1]);
  1776. if (unlikely(vaddr & (sizeof(u64) - 1)))
  1777. goto nack_inv;
  1778. rkey = be32_to_cpu(ateth->rkey);
  1779. /* Check rkey & NAK */
  1780. if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
  1781. sizeof(u64), vaddr, rkey,
  1782. IB_ACCESS_REMOTE_ATOMIC)))
  1783. goto nack_acc;
  1784. /* Perform atomic OP and save result. */
  1785. maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
  1786. sdata = be64_to_cpu(ateth->swap_data);
  1787. e = &qp->s_ack_queue[qp->r_head_ack_queue];
  1788. e->atomic_data = (opcode == OP(FETCH_ADD)) ?
  1789. (u64) atomic64_add_return(sdata, maddr) - sdata :
  1790. (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
  1791. be64_to_cpu(ateth->compare_data),
  1792. sdata);
  1793. e->opcode = opcode;
  1794. e->sent = 0;
  1795. e->psn = psn & IPATH_PSN_MASK;
  1796. qp->r_msn++;
  1797. qp->r_psn++;
  1798. qp->r_state = opcode;
  1799. qp->r_nak_state = 0;
  1800. barrier();
  1801. qp->r_head_ack_queue = next;
  1802. /* Call ipath_do_rc_send() in another thread. */
  1803. tasklet_hi_schedule(&qp->s_task);
  1804. goto done;
  1805. }
  1806. default:
  1807. /* NAK unknown opcodes. */
  1808. goto nack_inv;
  1809. }
  1810. qp->r_psn++;
  1811. qp->r_state = opcode;
  1812. qp->r_ack_psn = psn;
  1813. qp->r_nak_state = 0;
  1814. /* Send an ACK if requested or required. */
  1815. if (psn & (1 << 31))
  1816. goto send_ack;
  1817. goto done;
  1818. nack_acc:
  1819. ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
  1820. qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
  1821. qp->r_ack_psn = qp->r_psn;
  1822. send_ack:
  1823. send_rc_ack(qp);
  1824. done:
  1825. return;
  1826. }