aoecmd.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
  2. /*
  3. * aoecmd.c
  4. * Filesystem request handling methods
  5. */
  6. #include <linux/hdreg.h>
  7. #include <linux/blkdev.h>
  8. #include <linux/skbuff.h>
  9. #include <linux/netdevice.h>
  10. #include "aoe.h"
  11. #define TIMERTICK (HZ / 10)
  12. #define MINTIMER (2 * TIMERTICK)
  13. #define MAXTIMER (HZ << 1)
  14. #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
  15. static struct sk_buff *
  16. new_skb(struct net_device *if_dev, ulong len)
  17. {
  18. struct sk_buff *skb;
  19. skb = alloc_skb(len, GFP_ATOMIC);
  20. if (skb) {
  21. skb->nh.raw = skb->mac.raw = skb->data;
  22. skb->dev = if_dev;
  23. skb->protocol = __constant_htons(ETH_P_AOE);
  24. skb->priority = 0;
  25. skb_put(skb, len);
  26. skb->next = skb->prev = NULL;
  27. /* tell the network layer not to perform IP checksums
  28. * or to get the NIC to do it
  29. */
  30. skb->ip_summed = CHECKSUM_NONE;
  31. }
  32. return skb;
  33. }
  34. static struct sk_buff *
  35. skb_prepare(struct aoedev *d, struct frame *f)
  36. {
  37. struct sk_buff *skb;
  38. char *p;
  39. skb = new_skb(d->ifp, f->ndata + f->writedatalen);
  40. if (!skb) {
  41. printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
  42. return NULL;
  43. }
  44. p = skb->mac.raw;
  45. memcpy(p, f->data, f->ndata);
  46. if (f->writedatalen) {
  47. p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
  48. memcpy(p, f->bufaddr, f->writedatalen);
  49. }
  50. return skb;
  51. }
  52. static struct frame *
  53. getframe(struct aoedev *d, int tag)
  54. {
  55. struct frame *f, *e;
  56. f = d->frames;
  57. e = f + d->nframes;
  58. for (; f<e; f++)
  59. if (f->tag == tag)
  60. return f;
  61. return NULL;
  62. }
  63. /*
  64. * Leave the top bit clear so we have tagspace for userland.
  65. * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  66. * This driver reserves tag -1 to mean "unused frame."
  67. */
  68. static int
  69. newtag(struct aoedev *d)
  70. {
  71. register ulong n;
  72. n = jiffies & 0xffff;
  73. return n |= (++d->lasttag & 0x7fff) << 16;
  74. }
  75. static int
  76. aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
  77. {
  78. u32 host_tag = newtag(d);
  79. memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
  80. memcpy(h->dst, d->addr, sizeof h->dst);
  81. h->type = __constant_cpu_to_be16(ETH_P_AOE);
  82. h->verfl = AOE_HVER;
  83. h->major = cpu_to_be16(d->aoemajor);
  84. h->minor = d->aoeminor;
  85. h->cmd = AOECMD_ATA;
  86. h->tag = cpu_to_be32(host_tag);
  87. return host_tag;
  88. }
  89. static void
  90. aoecmd_ata_rw(struct aoedev *d, struct frame *f)
  91. {
  92. struct aoe_hdr *h;
  93. struct aoe_atahdr *ah;
  94. struct buf *buf;
  95. struct sk_buff *skb;
  96. ulong bcnt;
  97. register sector_t sector;
  98. char writebit, extbit;
  99. writebit = 0x10;
  100. extbit = 0x4;
  101. buf = d->inprocess;
  102. sector = buf->sector;
  103. bcnt = buf->bv_resid;
  104. if (bcnt > MAXATADATA)
  105. bcnt = MAXATADATA;
  106. /* initialize the headers & frame */
  107. h = (struct aoe_hdr *) f->data;
  108. ah = (struct aoe_atahdr *) (h+1);
  109. f->ndata = sizeof *h + sizeof *ah;
  110. memset(h, 0, f->ndata);
  111. f->tag = aoehdr_atainit(d, h);
  112. f->waited = 0;
  113. f->buf = buf;
  114. f->bufaddr = buf->bufaddr;
  115. /* set up ata header */
  116. ah->scnt = bcnt >> 9;
  117. ah->lba0 = sector;
  118. ah->lba1 = sector >>= 8;
  119. ah->lba2 = sector >>= 8;
  120. ah->lba3 = sector >>= 8;
  121. if (d->flags & DEVFL_EXT) {
  122. ah->aflags |= AOEAFL_EXT;
  123. ah->lba4 = sector >>= 8;
  124. ah->lba5 = sector >>= 8;
  125. } else {
  126. extbit = 0;
  127. ah->lba3 &= 0x0f;
  128. ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
  129. }
  130. if (bio_data_dir(buf->bio) == WRITE) {
  131. ah->aflags |= AOEAFL_WRITE;
  132. f->writedatalen = bcnt;
  133. } else {
  134. writebit = 0;
  135. f->writedatalen = 0;
  136. }
  137. ah->cmdstat = WIN_READ | writebit | extbit;
  138. /* mark all tracking fields and load out */
  139. buf->nframesout += 1;
  140. buf->bufaddr += bcnt;
  141. buf->bv_resid -= bcnt;
  142. /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
  143. buf->resid -= bcnt;
  144. buf->sector += bcnt >> 9;
  145. if (buf->resid == 0) {
  146. d->inprocess = NULL;
  147. } else if (buf->bv_resid == 0) {
  148. buf->bv++;
  149. buf->bv_resid = buf->bv->bv_len;
  150. buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
  151. }
  152. skb = skb_prepare(d, f);
  153. if (skb) {
  154. skb->next = d->skblist;
  155. d->skblist = skb;
  156. }
  157. }
  158. /* enters with d->lock held */
  159. void
  160. aoecmd_work(struct aoedev *d)
  161. {
  162. struct frame *f;
  163. struct buf *buf;
  164. loop:
  165. f = getframe(d, FREETAG);
  166. if (f == NULL)
  167. return;
  168. if (d->inprocess == NULL) {
  169. if (list_empty(&d->bufq))
  170. return;
  171. buf = container_of(d->bufq.next, struct buf, bufs);
  172. list_del(d->bufq.next);
  173. /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
  174. d->inprocess = buf;
  175. }
  176. aoecmd_ata_rw(d, f);
  177. goto loop;
  178. }
  179. static void
  180. rexmit(struct aoedev *d, struct frame *f)
  181. {
  182. struct sk_buff *skb;
  183. struct aoe_hdr *h;
  184. char buf[128];
  185. u32 n;
  186. n = newtag(d);
  187. snprintf(buf, sizeof buf,
  188. "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
  189. "retransmit",
  190. d->aoemajor, d->aoeminor, f->tag, jiffies, n);
  191. aoechr_error(buf);
  192. h = (struct aoe_hdr *) f->data;
  193. f->tag = n;
  194. h->tag = cpu_to_be32(n);
  195. skb = skb_prepare(d, f);
  196. if (skb) {
  197. skb->next = d->skblist;
  198. d->skblist = skb;
  199. }
  200. }
  201. static int
  202. tsince(int tag)
  203. {
  204. int n;
  205. n = jiffies & 0xffff;
  206. n -= tag & 0xffff;
  207. if (n < 0)
  208. n += 1<<16;
  209. return n;
  210. }
  211. static void
  212. rexmit_timer(ulong vp)
  213. {
  214. struct aoedev *d;
  215. struct frame *f, *e;
  216. struct sk_buff *sl;
  217. register long timeout;
  218. ulong flags, n;
  219. d = (struct aoedev *) vp;
  220. sl = NULL;
  221. /* timeout is always ~150% of the moving average */
  222. timeout = d->rttavg;
  223. timeout += timeout >> 1;
  224. spin_lock_irqsave(&d->lock, flags);
  225. if (d->flags & DEVFL_TKILL) {
  226. tdie: spin_unlock_irqrestore(&d->lock, flags);
  227. return;
  228. }
  229. f = d->frames;
  230. e = f + d->nframes;
  231. for (; f<e; f++) {
  232. if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
  233. n = f->waited += timeout;
  234. n /= HZ;
  235. if (n > MAXWAIT) { /* waited too long. device failure. */
  236. aoedev_downdev(d);
  237. goto tdie;
  238. }
  239. rexmit(d, f);
  240. }
  241. }
  242. sl = d->skblist;
  243. d->skblist = NULL;
  244. if (sl) {
  245. n = d->rttavg <<= 1;
  246. if (n > MAXTIMER)
  247. d->rttavg = MAXTIMER;
  248. }
  249. d->timer.expires = jiffies + TIMERTICK;
  250. add_timer(&d->timer);
  251. spin_unlock_irqrestore(&d->lock, flags);
  252. aoenet_xmit(sl);
  253. }
  254. static void
  255. ataid_complete(struct aoedev *d, unsigned char *id)
  256. {
  257. u64 ssize;
  258. u16 n;
  259. /* word 83: command set supported */
  260. n = le16_to_cpup((__le16 *) &id[83<<1]);
  261. /* word 86: command set/feature enabled */
  262. n |= le16_to_cpup((__le16 *) &id[86<<1]);
  263. if (n & (1<<10)) { /* bit 10: LBA 48 */
  264. d->flags |= DEVFL_EXT;
  265. /* word 100: number lba48 sectors */
  266. ssize = le64_to_cpup((__le64 *) &id[100<<1]);
  267. /* set as in ide-disk.c:init_idedisk_capacity */
  268. d->geo.cylinders = ssize;
  269. d->geo.cylinders /= (255 * 63);
  270. d->geo.heads = 255;
  271. d->geo.sectors = 63;
  272. } else {
  273. d->flags &= ~DEVFL_EXT;
  274. /* number lba28 sectors */
  275. ssize = le32_to_cpup((__le32 *) &id[60<<1]);
  276. /* NOTE: obsolete in ATA 6 */
  277. d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
  278. d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
  279. d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
  280. }
  281. d->ssize = ssize;
  282. d->geo.start = 0;
  283. if (d->gd != NULL) {
  284. d->gd->capacity = ssize;
  285. d->flags |= DEVFL_UP;
  286. return;
  287. }
  288. if (d->flags & DEVFL_WORKON) {
  289. printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
  290. "(This really shouldn't happen).\n");
  291. return;
  292. }
  293. INIT_WORK(&d->work, aoeblk_gdalloc, d);
  294. schedule_work(&d->work);
  295. d->flags |= DEVFL_WORKON;
  296. }
  297. static void
  298. calc_rttavg(struct aoedev *d, int rtt)
  299. {
  300. register long n;
  301. n = rtt;
  302. if (n < MINTIMER)
  303. n = MINTIMER;
  304. else if (n > MAXTIMER)
  305. n = MAXTIMER;
  306. /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
  307. n -= d->rttavg;
  308. d->rttavg += n >> 2;
  309. }
  310. void
  311. aoecmd_ata_rsp(struct sk_buff *skb)
  312. {
  313. struct aoedev *d;
  314. struct aoe_hdr *hin;
  315. struct aoe_atahdr *ahin, *ahout;
  316. struct frame *f;
  317. struct buf *buf;
  318. struct sk_buff *sl;
  319. register long n;
  320. ulong flags;
  321. char ebuf[128];
  322. u16 aoemajor;
  323. hin = (struct aoe_hdr *) skb->mac.raw;
  324. aoemajor = be16_to_cpu(hin->major);
  325. d = aoedev_by_aoeaddr(aoemajor, hin->minor);
  326. if (d == NULL) {
  327. snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
  328. "for unknown device %d.%d\n",
  329. aoemajor, hin->minor);
  330. aoechr_error(ebuf);
  331. return;
  332. }
  333. spin_lock_irqsave(&d->lock, flags);
  334. f = getframe(d, be32_to_cpu(hin->tag));
  335. if (f == NULL) {
  336. spin_unlock_irqrestore(&d->lock, flags);
  337. snprintf(ebuf, sizeof ebuf,
  338. "%15s e%d.%d tag=%08x@%08lx\n",
  339. "unexpected rsp",
  340. be16_to_cpu(hin->major),
  341. hin->minor,
  342. be32_to_cpu(hin->tag),
  343. jiffies);
  344. aoechr_error(ebuf);
  345. return;
  346. }
  347. calc_rttavg(d, tsince(f->tag));
  348. ahin = (struct aoe_atahdr *) (hin+1);
  349. ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
  350. buf = f->buf;
  351. if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
  352. printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
  353. "stat=%2.2Xh from e%ld.%ld\n",
  354. ahout->cmdstat, ahin->cmdstat,
  355. d->aoemajor, d->aoeminor);
  356. if (buf)
  357. buf->flags |= BUFFL_FAIL;
  358. } else {
  359. switch (ahout->cmdstat) {
  360. case WIN_READ:
  361. case WIN_READ_EXT:
  362. n = ahout->scnt << 9;
  363. if (skb->len - sizeof *hin - sizeof *ahin < n) {
  364. printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
  365. "ata data size in read. skb->len=%d\n",
  366. skb->len);
  367. /* fail frame f? just returning will rexmit. */
  368. spin_unlock_irqrestore(&d->lock, flags);
  369. return;
  370. }
  371. memcpy(f->bufaddr, ahin+1, n);
  372. case WIN_WRITE:
  373. case WIN_WRITE_EXT:
  374. break;
  375. case WIN_IDENTIFY:
  376. if (skb->len - sizeof *hin - sizeof *ahin < 512) {
  377. printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
  378. "in ataid. skb->len=%d\n", skb->len);
  379. spin_unlock_irqrestore(&d->lock, flags);
  380. return;
  381. }
  382. ataid_complete(d, (char *) (ahin+1));
  383. /* d->flags |= DEVFL_WC_UPDATE; */
  384. break;
  385. default:
  386. printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
  387. "outbound ata command %2.2Xh for %d.%d\n",
  388. ahout->cmdstat,
  389. be16_to_cpu(hin->major),
  390. hin->minor);
  391. }
  392. }
  393. if (buf) {
  394. buf->nframesout -= 1;
  395. if (buf->nframesout == 0 && buf->resid == 0) {
  396. unsigned long duration = jiffies - buf->start_time;
  397. unsigned long n_sect = buf->bio->bi_size >> 9;
  398. struct gendisk *disk = d->gd;
  399. if (bio_data_dir(buf->bio) == WRITE) {
  400. disk_stat_inc(disk, writes);
  401. disk_stat_add(disk, write_ticks, duration);
  402. disk_stat_add(disk, write_sectors, n_sect);
  403. } else {
  404. disk_stat_inc(disk, reads);
  405. disk_stat_add(disk, read_ticks, duration);
  406. disk_stat_add(disk, read_sectors, n_sect);
  407. }
  408. disk_stat_add(disk, io_ticks, duration);
  409. n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
  410. bio_endio(buf->bio, buf->bio->bi_size, n);
  411. mempool_free(buf, d->bufpool);
  412. }
  413. }
  414. f->buf = NULL;
  415. f->tag = FREETAG;
  416. aoecmd_work(d);
  417. sl = d->skblist;
  418. d->skblist = NULL;
  419. spin_unlock_irqrestore(&d->lock, flags);
  420. aoenet_xmit(sl);
  421. }
  422. void
  423. aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
  424. {
  425. struct aoe_hdr *h;
  426. struct aoe_cfghdr *ch;
  427. struct sk_buff *skb, *sl;
  428. struct net_device *ifp;
  429. sl = NULL;
  430. read_lock(&dev_base_lock);
  431. for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
  432. dev_hold(ifp);
  433. if (!is_aoe_netif(ifp))
  434. continue;
  435. skb = new_skb(ifp, sizeof *h + sizeof *ch);
  436. if (skb == NULL) {
  437. printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
  438. continue;
  439. }
  440. h = (struct aoe_hdr *) skb->mac.raw;
  441. memset(h, 0, sizeof *h + sizeof *ch);
  442. memset(h->dst, 0xff, sizeof h->dst);
  443. memcpy(h->src, ifp->dev_addr, sizeof h->src);
  444. h->type = __constant_cpu_to_be16(ETH_P_AOE);
  445. h->verfl = AOE_HVER;
  446. h->major = cpu_to_be16(aoemajor);
  447. h->minor = aoeminor;
  448. h->cmd = AOECMD_CFG;
  449. skb->next = sl;
  450. sl = skb;
  451. }
  452. read_unlock(&dev_base_lock);
  453. aoenet_xmit(sl);
  454. }
  455. /*
  456. * Since we only call this in one place (and it only prepares one frame)
  457. * we just return the skb. Usually we'd chain it up to the d->skblist.
  458. */
  459. static struct sk_buff *
  460. aoecmd_ata_id(struct aoedev *d)
  461. {
  462. struct aoe_hdr *h;
  463. struct aoe_atahdr *ah;
  464. struct frame *f;
  465. struct sk_buff *skb;
  466. f = getframe(d, FREETAG);
  467. if (f == NULL) {
  468. printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
  469. "This shouldn't happen.\n");
  470. return NULL;
  471. }
  472. /* initialize the headers & frame */
  473. h = (struct aoe_hdr *) f->data;
  474. ah = (struct aoe_atahdr *) (h+1);
  475. f->ndata = sizeof *h + sizeof *ah;
  476. memset(h, 0, f->ndata);
  477. f->tag = aoehdr_atainit(d, h);
  478. f->waited = 0;
  479. f->writedatalen = 0;
  480. /* this message initializes the device, so we reset the rttavg */
  481. d->rttavg = MAXTIMER;
  482. /* set up ata header */
  483. ah->scnt = 1;
  484. ah->cmdstat = WIN_IDENTIFY;
  485. ah->lba3 = 0xa0;
  486. skb = skb_prepare(d, f);
  487. /* we now want to start the rexmit tracking */
  488. d->flags &= ~DEVFL_TKILL;
  489. d->timer.data = (ulong) d;
  490. d->timer.function = rexmit_timer;
  491. d->timer.expires = jiffies + TIMERTICK;
  492. add_timer(&d->timer);
  493. return skb;
  494. }
  495. void
  496. aoecmd_cfg_rsp(struct sk_buff *skb)
  497. {
  498. struct aoedev *d;
  499. struct aoe_hdr *h;
  500. struct aoe_cfghdr *ch;
  501. ulong flags, sysminor, aoemajor;
  502. u16 bufcnt;
  503. struct sk_buff *sl;
  504. enum { MAXFRAMES = 8 };
  505. h = (struct aoe_hdr *) skb->mac.raw;
  506. ch = (struct aoe_cfghdr *) (h+1);
  507. /*
  508. * Enough people have their dip switches set backwards to
  509. * warrant a loud message for this special case.
  510. */
  511. aoemajor = be16_to_cpu(h->major);
  512. if (aoemajor == 0xfff) {
  513. printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
  514. "address is all ones. Check shelf dip switches\n");
  515. return;
  516. }
  517. sysminor = SYSMINOR(aoemajor, h->minor);
  518. if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
  519. printk(KERN_INFO
  520. "aoe: e%ld.%d: minor number too large\n",
  521. aoemajor, (int) h->minor);
  522. return;
  523. }
  524. bufcnt = be16_to_cpu(ch->bufcnt);
  525. if (bufcnt > MAXFRAMES) /* keep it reasonable */
  526. bufcnt = MAXFRAMES;
  527. d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
  528. if (d == NULL) {
  529. printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
  530. return;
  531. }
  532. spin_lock_irqsave(&d->lock, flags);
  533. if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
  534. spin_unlock_irqrestore(&d->lock, flags);
  535. return;
  536. }
  537. d->fw_ver = be16_to_cpu(ch->fwver);
  538. /* we get here only if the device is new */
  539. sl = aoecmd_ata_id(d);
  540. spin_unlock_irqrestore(&d->lock, flags);
  541. aoenet_xmit(sl);
  542. }