aoecmd.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
  2. /*
  3. * aoecmd.c
  4. * Filesystem request handling methods
  5. */
  6. #include <linux/hdreg.h>
  7. #include <linux/blkdev.h>
  8. #include <linux/skbuff.h>
  9. #include <linux/netdevice.h>
  10. #include "aoe.h"
  11. #define TIMERTICK (HZ / 10)
  12. #define MINTIMER (2 * TIMERTICK)
  13. #define MAXTIMER (HZ << 1)
  14. #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
  15. static struct sk_buff *
  16. new_skb(struct net_device *if_dev, ulong len)
  17. {
  18. struct sk_buff *skb;
  19. skb = alloc_skb(len, GFP_ATOMIC);
  20. if (skb) {
  21. skb->nh.raw = skb->mac.raw = skb->data;
  22. skb->dev = if_dev;
  23. skb->protocol = __constant_htons(ETH_P_AOE);
  24. skb->priority = 0;
  25. skb_put(skb, len);
  26. skb->next = skb->prev = NULL;
  27. /* tell the network layer not to perform IP checksums
  28. * or to get the NIC to do it
  29. */
  30. skb->ip_summed = CHECKSUM_NONE;
  31. }
  32. return skb;
  33. }
  34. static struct sk_buff *
  35. skb_prepare(struct aoedev *d, struct frame *f)
  36. {
  37. struct sk_buff *skb;
  38. char *p;
  39. skb = new_skb(d->ifp, f->ndata + f->writedatalen);
  40. if (!skb) {
  41. printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
  42. return NULL;
  43. }
  44. p = skb->mac.raw;
  45. memcpy(p, f->data, f->ndata);
  46. if (f->writedatalen) {
  47. p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
  48. memcpy(p, f->bufaddr, f->writedatalen);
  49. }
  50. return skb;
  51. }
  52. static struct frame *
  53. getframe(struct aoedev *d, int tag)
  54. {
  55. struct frame *f, *e;
  56. f = d->frames;
  57. e = f + d->nframes;
  58. for (; f<e; f++)
  59. if (f->tag == tag)
  60. return f;
  61. return NULL;
  62. }
  63. /*
  64. * Leave the top bit clear so we have tagspace for userland.
  65. * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  66. * This driver reserves tag -1 to mean "unused frame."
  67. */
  68. static int
  69. newtag(struct aoedev *d)
  70. {
  71. register ulong n;
  72. n = jiffies & 0xffff;
  73. return n |= (++d->lasttag & 0x7fff) << 16;
  74. }
  75. static int
  76. aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
  77. {
  78. u16 type = __constant_cpu_to_be16(ETH_P_AOE);
  79. u16 aoemajor = __cpu_to_be16(d->aoemajor);
  80. u32 host_tag = newtag(d);
  81. u32 tag = __cpu_to_be32(host_tag);
  82. memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
  83. memcpy(h->dst, d->addr, sizeof h->dst);
  84. memcpy(h->type, &type, sizeof type);
  85. h->verfl = AOE_HVER;
  86. memcpy(h->major, &aoemajor, sizeof aoemajor);
  87. h->minor = d->aoeminor;
  88. h->cmd = AOECMD_ATA;
  89. memcpy(h->tag, &tag, sizeof tag);
  90. return host_tag;
  91. }
  92. static void
  93. aoecmd_ata_rw(struct aoedev *d, struct frame *f)
  94. {
  95. struct aoe_hdr *h;
  96. struct aoe_atahdr *ah;
  97. struct buf *buf;
  98. struct sk_buff *skb;
  99. ulong bcnt;
  100. register sector_t sector;
  101. char writebit, extbit;
  102. writebit = 0x10;
  103. extbit = 0x4;
  104. buf = d->inprocess;
  105. sector = buf->sector;
  106. bcnt = buf->bv_resid;
  107. if (bcnt > MAXATADATA)
  108. bcnt = MAXATADATA;
  109. /* initialize the headers & frame */
  110. h = (struct aoe_hdr *) f->data;
  111. ah = (struct aoe_atahdr *) (h+1);
  112. f->ndata = sizeof *h + sizeof *ah;
  113. memset(h, 0, f->ndata);
  114. f->tag = aoehdr_atainit(d, h);
  115. f->waited = 0;
  116. f->buf = buf;
  117. f->bufaddr = buf->bufaddr;
  118. /* set up ata header */
  119. ah->scnt = bcnt >> 9;
  120. ah->lba0 = sector;
  121. ah->lba1 = sector >>= 8;
  122. ah->lba2 = sector >>= 8;
  123. ah->lba3 = sector >>= 8;
  124. if (d->flags & DEVFL_EXT) {
  125. ah->aflags |= AOEAFL_EXT;
  126. ah->lba4 = sector >>= 8;
  127. ah->lba5 = sector >>= 8;
  128. } else {
  129. extbit = 0;
  130. ah->lba3 &= 0x0f;
  131. ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
  132. }
  133. if (bio_data_dir(buf->bio) == WRITE) {
  134. ah->aflags |= AOEAFL_WRITE;
  135. f->writedatalen = bcnt;
  136. } else {
  137. writebit = 0;
  138. f->writedatalen = 0;
  139. }
  140. ah->cmdstat = WIN_READ | writebit | extbit;
  141. /* mark all tracking fields and load out */
  142. buf->nframesout += 1;
  143. buf->bufaddr += bcnt;
  144. buf->bv_resid -= bcnt;
  145. /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
  146. buf->resid -= bcnt;
  147. buf->sector += bcnt >> 9;
  148. if (buf->resid == 0) {
  149. d->inprocess = NULL;
  150. } else if (buf->bv_resid == 0) {
  151. buf->bv++;
  152. buf->bv_resid = buf->bv->bv_len;
  153. buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
  154. }
  155. skb = skb_prepare(d, f);
  156. if (skb) {
  157. skb->next = d->skblist;
  158. d->skblist = skb;
  159. }
  160. }
  161. /* enters with d->lock held */
  162. void
  163. aoecmd_work(struct aoedev *d)
  164. {
  165. struct frame *f;
  166. struct buf *buf;
  167. loop:
  168. f = getframe(d, FREETAG);
  169. if (f == NULL)
  170. return;
  171. if (d->inprocess == NULL) {
  172. if (list_empty(&d->bufq))
  173. return;
  174. buf = container_of(d->bufq.next, struct buf, bufs);
  175. list_del(d->bufq.next);
  176. /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
  177. d->inprocess = buf;
  178. }
  179. aoecmd_ata_rw(d, f);
  180. goto loop;
  181. }
  182. static void
  183. rexmit(struct aoedev *d, struct frame *f)
  184. {
  185. struct sk_buff *skb;
  186. struct aoe_hdr *h;
  187. char buf[128];
  188. u32 n;
  189. u32 net_tag;
  190. n = newtag(d);
  191. snprintf(buf, sizeof buf,
  192. "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
  193. "retransmit",
  194. d->aoemajor, d->aoeminor, f->tag, jiffies, n);
  195. aoechr_error(buf);
  196. h = (struct aoe_hdr *) f->data;
  197. f->tag = n;
  198. net_tag = __cpu_to_be32(n);
  199. memcpy(h->tag, &net_tag, sizeof net_tag);
  200. skb = skb_prepare(d, f);
  201. if (skb) {
  202. skb->next = d->skblist;
  203. d->skblist = skb;
  204. }
  205. }
  206. static int
  207. tsince(int tag)
  208. {
  209. int n;
  210. n = jiffies & 0xffff;
  211. n -= tag & 0xffff;
  212. if (n < 0)
  213. n += 1<<16;
  214. return n;
  215. }
  216. static void
  217. rexmit_timer(ulong vp)
  218. {
  219. struct aoedev *d;
  220. struct frame *f, *e;
  221. struct sk_buff *sl;
  222. register long timeout;
  223. ulong flags, n;
  224. d = (struct aoedev *) vp;
  225. sl = NULL;
  226. /* timeout is always ~150% of the moving average */
  227. timeout = d->rttavg;
  228. timeout += timeout >> 1;
  229. spin_lock_irqsave(&d->lock, flags);
  230. if (d->flags & DEVFL_TKILL) {
  231. tdie: spin_unlock_irqrestore(&d->lock, flags);
  232. return;
  233. }
  234. f = d->frames;
  235. e = f + d->nframes;
  236. for (; f<e; f++) {
  237. if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
  238. n = f->waited += timeout;
  239. n /= HZ;
  240. if (n > MAXWAIT) { /* waited too long. device failure. */
  241. aoedev_downdev(d);
  242. goto tdie;
  243. }
  244. rexmit(d, f);
  245. }
  246. }
  247. sl = d->skblist;
  248. d->skblist = NULL;
  249. if (sl) {
  250. n = d->rttavg <<= 1;
  251. if (n > MAXTIMER)
  252. d->rttavg = MAXTIMER;
  253. }
  254. d->timer.expires = jiffies + TIMERTICK;
  255. add_timer(&d->timer);
  256. spin_unlock_irqrestore(&d->lock, flags);
  257. aoenet_xmit(sl);
  258. }
  259. static void
  260. ataid_complete(struct aoedev *d, unsigned char *id)
  261. {
  262. u64 ssize;
  263. u16 n;
  264. /* word 83: command set supported */
  265. n = __le16_to_cpu(*((u16 *) &id[83<<1]));
  266. /* word 86: command set/feature enabled */
  267. n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
  268. if (n & (1<<10)) { /* bit 10: LBA 48 */
  269. d->flags |= DEVFL_EXT;
  270. /* word 100: number lba48 sectors */
  271. ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
  272. /* set as in ide-disk.c:init_idedisk_capacity */
  273. d->geo.cylinders = ssize;
  274. d->geo.cylinders /= (255 * 63);
  275. d->geo.heads = 255;
  276. d->geo.sectors = 63;
  277. } else {
  278. d->flags &= ~DEVFL_EXT;
  279. /* number lba28 sectors */
  280. ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
  281. /* NOTE: obsolete in ATA 6 */
  282. d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
  283. d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
  284. d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
  285. }
  286. d->ssize = ssize;
  287. d->geo.start = 0;
  288. if (d->gd != NULL) {
  289. d->gd->capacity = ssize;
  290. d->flags |= DEVFL_UP;
  291. return;
  292. }
  293. if (d->flags & DEVFL_WORKON) {
  294. printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
  295. "(This really shouldn't happen).\n");
  296. return;
  297. }
  298. INIT_WORK(&d->work, aoeblk_gdalloc, d);
  299. schedule_work(&d->work);
  300. d->flags |= DEVFL_WORKON;
  301. }
  302. static void
  303. calc_rttavg(struct aoedev *d, int rtt)
  304. {
  305. register long n;
  306. n = rtt;
  307. if (n < MINTIMER)
  308. n = MINTIMER;
  309. else if (n > MAXTIMER)
  310. n = MAXTIMER;
  311. /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
  312. n -= d->rttavg;
  313. d->rttavg += n >> 2;
  314. }
  315. void
  316. aoecmd_ata_rsp(struct sk_buff *skb)
  317. {
  318. struct aoedev *d;
  319. struct aoe_hdr *hin;
  320. struct aoe_atahdr *ahin, *ahout;
  321. struct frame *f;
  322. struct buf *buf;
  323. struct sk_buff *sl;
  324. register long n;
  325. ulong flags;
  326. char ebuf[128];
  327. hin = (struct aoe_hdr *) skb->mac.raw;
  328. d = aoedev_bymac(hin->src);
  329. if (d == NULL) {
  330. snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
  331. "for unknown device %d.%d\n",
  332. __be16_to_cpu(*((u16 *) hin->major)),
  333. hin->minor);
  334. aoechr_error(ebuf);
  335. return;
  336. }
  337. spin_lock_irqsave(&d->lock, flags);
  338. f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
  339. if (f == NULL) {
  340. spin_unlock_irqrestore(&d->lock, flags);
  341. snprintf(ebuf, sizeof ebuf,
  342. "%15s e%d.%d tag=%08x@%08lx\n",
  343. "unexpected rsp",
  344. __be16_to_cpu(*((u16 *) hin->major)),
  345. hin->minor,
  346. __be32_to_cpu(*((u32 *) hin->tag)),
  347. jiffies);
  348. aoechr_error(ebuf);
  349. return;
  350. }
  351. calc_rttavg(d, tsince(f->tag));
  352. ahin = (struct aoe_atahdr *) (hin+1);
  353. ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
  354. buf = f->buf;
  355. if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
  356. printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
  357. "stat=%2.2Xh from e%ld.%ld\n",
  358. ahout->cmdstat, ahin->cmdstat,
  359. d->aoemajor, d->aoeminor);
  360. if (buf)
  361. buf->flags |= BUFFL_FAIL;
  362. } else {
  363. switch (ahout->cmdstat) {
  364. case WIN_READ:
  365. case WIN_READ_EXT:
  366. n = ahout->scnt << 9;
  367. if (skb->len - sizeof *hin - sizeof *ahin < n) {
  368. printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
  369. "ata data size in read. skb->len=%d\n",
  370. skb->len);
  371. /* fail frame f? just returning will rexmit. */
  372. spin_unlock_irqrestore(&d->lock, flags);
  373. return;
  374. }
  375. memcpy(f->bufaddr, ahin+1, n);
  376. case WIN_WRITE:
  377. case WIN_WRITE_EXT:
  378. break;
  379. case WIN_IDENTIFY:
  380. if (skb->len - sizeof *hin - sizeof *ahin < 512) {
  381. printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
  382. "in ataid. skb->len=%d\n", skb->len);
  383. spin_unlock_irqrestore(&d->lock, flags);
  384. return;
  385. }
  386. ataid_complete(d, (char *) (ahin+1));
  387. /* d->flags |= DEVFL_WC_UPDATE; */
  388. break;
  389. default:
  390. printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
  391. "outbound ata command %2.2Xh for %d.%d\n",
  392. ahout->cmdstat,
  393. __be16_to_cpu(*((u16 *) hin->major)),
  394. hin->minor);
  395. }
  396. }
  397. if (buf) {
  398. buf->nframesout -= 1;
  399. if (buf->nframesout == 0 && buf->resid == 0) {
  400. n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
  401. bio_endio(buf->bio, buf->bio->bi_size, n);
  402. mempool_free(buf, d->bufpool);
  403. }
  404. }
  405. f->buf = NULL;
  406. f->tag = FREETAG;
  407. aoecmd_work(d);
  408. sl = d->skblist;
  409. d->skblist = NULL;
  410. spin_unlock_irqrestore(&d->lock, flags);
  411. aoenet_xmit(sl);
  412. }
  413. void
  414. aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
  415. {
  416. struct aoe_hdr *h;
  417. struct aoe_cfghdr *ch;
  418. struct sk_buff *skb, *sl;
  419. struct net_device *ifp;
  420. u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
  421. u16 net_aoemajor = __cpu_to_be16(aoemajor);
  422. sl = NULL;
  423. read_lock(&dev_base_lock);
  424. for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
  425. dev_hold(ifp);
  426. if (!is_aoe_netif(ifp))
  427. continue;
  428. skb = new_skb(ifp, sizeof *h + sizeof *ch);
  429. if (skb == NULL) {
  430. printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
  431. continue;
  432. }
  433. h = (struct aoe_hdr *) skb->mac.raw;
  434. memset(h, 0, sizeof *h + sizeof *ch);
  435. memset(h->dst, 0xff, sizeof h->dst);
  436. memcpy(h->src, ifp->dev_addr, sizeof h->src);
  437. memcpy(h->type, &aoe_type, sizeof aoe_type);
  438. h->verfl = AOE_HVER;
  439. memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
  440. h->minor = aoeminor;
  441. h->cmd = AOECMD_CFG;
  442. skb->next = sl;
  443. sl = skb;
  444. }
  445. read_unlock(&dev_base_lock);
  446. aoenet_xmit(sl);
  447. }
  448. /*
  449. * Since we only call this in one place (and it only prepares one frame)
  450. * we just return the skb. Usually we'd chain it up to the d->skblist.
  451. */
  452. static struct sk_buff *
  453. aoecmd_ata_id(struct aoedev *d)
  454. {
  455. struct aoe_hdr *h;
  456. struct aoe_atahdr *ah;
  457. struct frame *f;
  458. struct sk_buff *skb;
  459. f = getframe(d, FREETAG);
  460. if (f == NULL) {
  461. printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
  462. "This shouldn't happen.\n");
  463. return NULL;
  464. }
  465. /* initialize the headers & frame */
  466. h = (struct aoe_hdr *) f->data;
  467. ah = (struct aoe_atahdr *) (h+1);
  468. f->ndata = sizeof *h + sizeof *ah;
  469. memset(h, 0, f->ndata);
  470. f->tag = aoehdr_atainit(d, h);
  471. f->waited = 0;
  472. f->writedatalen = 0;
  473. /* this message initializes the device, so we reset the rttavg */
  474. d->rttavg = MAXTIMER;
  475. /* set up ata header */
  476. ah->scnt = 1;
  477. ah->cmdstat = WIN_IDENTIFY;
  478. ah->lba3 = 0xa0;
  479. skb = skb_prepare(d, f);
  480. /* we now want to start the rexmit tracking */
  481. d->flags &= ~DEVFL_TKILL;
  482. d->timer.data = (ulong) d;
  483. d->timer.function = rexmit_timer;
  484. d->timer.expires = jiffies + TIMERTICK;
  485. add_timer(&d->timer);
  486. return skb;
  487. }
  488. void
  489. aoecmd_cfg_rsp(struct sk_buff *skb)
  490. {
  491. struct aoedev *d;
  492. struct aoe_hdr *h;
  493. struct aoe_cfghdr *ch;
  494. ulong flags, bufcnt, sysminor, aoemajor;
  495. struct sk_buff *sl;
  496. enum { MAXFRAMES = 8 };
  497. h = (struct aoe_hdr *) skb->mac.raw;
  498. ch = (struct aoe_cfghdr *) (h+1);
  499. /*
  500. * Enough people have their dip switches set backwards to
  501. * warrant a loud message for this special case.
  502. */
  503. aoemajor = __be16_to_cpu(*((u16 *) h->major));
  504. if (aoemajor == 0xfff) {
  505. printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
  506. "address is all ones. Check shelf dip switches\n");
  507. return;
  508. }
  509. sysminor = SYSMINOR(aoemajor, h->minor);
  510. if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
  511. printk(KERN_INFO
  512. "aoe: e%ld.%d: minor number too large\n",
  513. aoemajor, (int) h->minor);
  514. return;
  515. }
  516. bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
  517. if (bufcnt > MAXFRAMES) /* keep it reasonable */
  518. bufcnt = MAXFRAMES;
  519. d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
  520. if (d == NULL) {
  521. printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
  522. return;
  523. }
  524. spin_lock_irqsave(&d->lock, flags);
  525. if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
  526. spin_unlock_irqrestore(&d->lock, flags);
  527. return;
  528. }
  529. d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
  530. /* we get here only if the device is new */
  531. sl = aoecmd_ata_id(d);
  532. spin_unlock_irqrestore(&d->lock, flags);
  533. aoenet_xmit(sl);
  534. }