aoecmd.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648
  1. /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
  2. /*
  3. * aoecmd.c
  4. * Filesystem request handling methods
  5. */
  6. #include <linux/hdreg.h>
  7. #include <linux/blkdev.h>
  8. #include <linux/skbuff.h>
  9. #include <linux/netdevice.h>
  10. #include <asm/unaligned.h>
  11. #include "aoe.h"
  12. #define TIMERTICK (HZ / 10)
  13. #define MINTIMER (2 * TIMERTICK)
  14. #define MAXTIMER (HZ << 1)
  15. #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
  16. static struct sk_buff *
  17. new_skb(struct net_device *if_dev, ulong len)
  18. {
  19. struct sk_buff *skb;
  20. skb = alloc_skb(len, GFP_ATOMIC);
  21. if (skb) {
  22. skb->nh.raw = skb->mac.raw = skb->data;
  23. skb->dev = if_dev;
  24. skb->protocol = __constant_htons(ETH_P_AOE);
  25. skb->priority = 0;
  26. skb_put(skb, len);
  27. skb->next = skb->prev = NULL;
  28. /* tell the network layer not to perform IP checksums
  29. * or to get the NIC to do it
  30. */
  31. skb->ip_summed = CHECKSUM_NONE;
  32. }
  33. return skb;
  34. }
  35. static struct sk_buff *
  36. skb_prepare(struct aoedev *d, struct frame *f)
  37. {
  38. struct sk_buff *skb;
  39. char *p;
  40. skb = new_skb(d->ifp, f->ndata + f->writedatalen);
  41. if (!skb) {
  42. printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
  43. return NULL;
  44. }
  45. p = skb->mac.raw;
  46. memcpy(p, f->data, f->ndata);
  47. if (f->writedatalen) {
  48. p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
  49. memcpy(p, f->bufaddr, f->writedatalen);
  50. }
  51. return skb;
  52. }
  53. static struct frame *
  54. getframe(struct aoedev *d, int tag)
  55. {
  56. struct frame *f, *e;
  57. f = d->frames;
  58. e = f + d->nframes;
  59. for (; f<e; f++)
  60. if (f->tag == tag)
  61. return f;
  62. return NULL;
  63. }
  64. /*
  65. * Leave the top bit clear so we have tagspace for userland.
  66. * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  67. * This driver reserves tag -1 to mean "unused frame."
  68. */
  69. static int
  70. newtag(struct aoedev *d)
  71. {
  72. register ulong n;
  73. n = jiffies & 0xffff;
  74. return n |= (++d->lasttag & 0x7fff) << 16;
  75. }
  76. static int
  77. aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
  78. {
  79. u32 host_tag = newtag(d);
  80. memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
  81. memcpy(h->dst, d->addr, sizeof h->dst);
  82. h->type = __constant_cpu_to_be16(ETH_P_AOE);
  83. h->verfl = AOE_HVER;
  84. h->major = cpu_to_be16(d->aoemajor);
  85. h->minor = d->aoeminor;
  86. h->cmd = AOECMD_ATA;
  87. h->tag = cpu_to_be32(host_tag);
  88. return host_tag;
  89. }
  90. static void
  91. aoecmd_ata_rw(struct aoedev *d, struct frame *f)
  92. {
  93. struct aoe_hdr *h;
  94. struct aoe_atahdr *ah;
  95. struct buf *buf;
  96. struct sk_buff *skb;
  97. ulong bcnt;
  98. register sector_t sector;
  99. char writebit, extbit;
  100. writebit = 0x10;
  101. extbit = 0x4;
  102. buf = d->inprocess;
  103. sector = buf->sector;
  104. bcnt = buf->bv_resid;
  105. if (bcnt > MAXATADATA)
  106. bcnt = MAXATADATA;
  107. /* initialize the headers & frame */
  108. h = (struct aoe_hdr *) f->data;
  109. ah = (struct aoe_atahdr *) (h+1);
  110. f->ndata = sizeof *h + sizeof *ah;
  111. memset(h, 0, f->ndata);
  112. f->tag = aoehdr_atainit(d, h);
  113. f->waited = 0;
  114. f->buf = buf;
  115. f->bufaddr = buf->bufaddr;
  116. /* set up ata header */
  117. ah->scnt = bcnt >> 9;
  118. ah->lba0 = sector;
  119. ah->lba1 = sector >>= 8;
  120. ah->lba2 = sector >>= 8;
  121. ah->lba3 = sector >>= 8;
  122. if (d->flags & DEVFL_EXT) {
  123. ah->aflags |= AOEAFL_EXT;
  124. ah->lba4 = sector >>= 8;
  125. ah->lba5 = sector >>= 8;
  126. } else {
  127. extbit = 0;
  128. ah->lba3 &= 0x0f;
  129. ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
  130. }
  131. if (bio_data_dir(buf->bio) == WRITE) {
  132. ah->aflags |= AOEAFL_WRITE;
  133. f->writedatalen = bcnt;
  134. } else {
  135. writebit = 0;
  136. f->writedatalen = 0;
  137. }
  138. ah->cmdstat = WIN_READ | writebit | extbit;
  139. /* mark all tracking fields and load out */
  140. buf->nframesout += 1;
  141. buf->bufaddr += bcnt;
  142. buf->bv_resid -= bcnt;
  143. /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
  144. buf->resid -= bcnt;
  145. buf->sector += bcnt >> 9;
  146. if (buf->resid == 0) {
  147. d->inprocess = NULL;
  148. } else if (buf->bv_resid == 0) {
  149. buf->bv++;
  150. buf->bv_resid = buf->bv->bv_len;
  151. buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
  152. }
  153. skb = skb_prepare(d, f);
  154. if (skb) {
  155. skb->next = NULL;
  156. if (d->sendq_hd)
  157. d->sendq_tl->next = skb;
  158. else
  159. d->sendq_hd = skb;
  160. d->sendq_tl = skb;
  161. }
  162. }
  163. /* enters with d->lock held */
  164. void
  165. aoecmd_work(struct aoedev *d)
  166. {
  167. struct frame *f;
  168. struct buf *buf;
  169. loop:
  170. f = getframe(d, FREETAG);
  171. if (f == NULL)
  172. return;
  173. if (d->inprocess == NULL) {
  174. if (list_empty(&d->bufq))
  175. return;
  176. buf = container_of(d->bufq.next, struct buf, bufs);
  177. list_del(d->bufq.next);
  178. /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
  179. d->inprocess = buf;
  180. }
  181. aoecmd_ata_rw(d, f);
  182. goto loop;
  183. }
  184. static void
  185. rexmit(struct aoedev *d, struct frame *f)
  186. {
  187. struct sk_buff *skb;
  188. struct aoe_hdr *h;
  189. char buf[128];
  190. u32 n;
  191. n = newtag(d);
  192. snprintf(buf, sizeof buf,
  193. "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
  194. "retransmit",
  195. d->aoemajor, d->aoeminor, f->tag, jiffies, n);
  196. aoechr_error(buf);
  197. h = (struct aoe_hdr *) f->data;
  198. f->tag = n;
  199. h->tag = cpu_to_be32(n);
  200. skb = skb_prepare(d, f);
  201. if (skb) {
  202. skb->next = NULL;
  203. if (d->sendq_hd)
  204. d->sendq_tl->next = skb;
  205. else
  206. d->sendq_hd = skb;
  207. d->sendq_tl = skb;
  208. }
  209. }
  210. static int
  211. tsince(int tag)
  212. {
  213. int n;
  214. n = jiffies & 0xffff;
  215. n -= tag & 0xffff;
  216. if (n < 0)
  217. n += 1<<16;
  218. return n;
  219. }
  220. static void
  221. rexmit_timer(ulong vp)
  222. {
  223. struct aoedev *d;
  224. struct frame *f, *e;
  225. struct sk_buff *sl;
  226. register long timeout;
  227. ulong flags, n;
  228. d = (struct aoedev *) vp;
  229. sl = NULL;
  230. /* timeout is always ~150% of the moving average */
  231. timeout = d->rttavg;
  232. timeout += timeout >> 1;
  233. spin_lock_irqsave(&d->lock, flags);
  234. if (d->flags & DEVFL_TKILL) {
  235. tdie: spin_unlock_irqrestore(&d->lock, flags);
  236. return;
  237. }
  238. f = d->frames;
  239. e = f + d->nframes;
  240. for (; f<e; f++) {
  241. if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
  242. n = f->waited += timeout;
  243. n /= HZ;
  244. if (n > MAXWAIT) { /* waited too long. device failure. */
  245. aoedev_downdev(d);
  246. goto tdie;
  247. }
  248. rexmit(d, f);
  249. }
  250. }
  251. sl = d->sendq_hd;
  252. d->sendq_hd = d->sendq_tl = NULL;
  253. if (sl) {
  254. n = d->rttavg <<= 1;
  255. if (n > MAXTIMER)
  256. d->rttavg = MAXTIMER;
  257. }
  258. d->timer.expires = jiffies + TIMERTICK;
  259. add_timer(&d->timer);
  260. spin_unlock_irqrestore(&d->lock, flags);
  261. aoenet_xmit(sl);
  262. }
  263. static void
  264. ataid_complete(struct aoedev *d, unsigned char *id)
  265. {
  266. u64 ssize;
  267. u16 n;
  268. /* word 83: command set supported */
  269. n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
  270. /* word 86: command set/feature enabled */
  271. n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
  272. if (n & (1<<10)) { /* bit 10: LBA 48 */
  273. d->flags |= DEVFL_EXT;
  274. /* word 100: number lba48 sectors */
  275. ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
  276. /* set as in ide-disk.c:init_idedisk_capacity */
  277. d->geo.cylinders = ssize;
  278. d->geo.cylinders /= (255 * 63);
  279. d->geo.heads = 255;
  280. d->geo.sectors = 63;
  281. } else {
  282. d->flags &= ~DEVFL_EXT;
  283. /* number lba28 sectors */
  284. ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
  285. /* NOTE: obsolete in ATA 6 */
  286. d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
  287. d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
  288. d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
  289. }
  290. d->ssize = ssize;
  291. d->geo.start = 0;
  292. if (d->gd != NULL) {
  293. d->gd->capacity = ssize;
  294. d->flags |= DEVFL_UP;
  295. return;
  296. }
  297. if (d->flags & DEVFL_WORKON) {
  298. printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
  299. "(This really shouldn't happen).\n");
  300. return;
  301. }
  302. INIT_WORK(&d->work, aoeblk_gdalloc, d);
  303. schedule_work(&d->work);
  304. d->flags |= DEVFL_WORKON;
  305. }
  306. static void
  307. calc_rttavg(struct aoedev *d, int rtt)
  308. {
  309. register long n;
  310. n = rtt;
  311. if (n < MINTIMER)
  312. n = MINTIMER;
  313. else if (n > MAXTIMER)
  314. n = MAXTIMER;
  315. /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
  316. n -= d->rttavg;
  317. d->rttavg += n >> 2;
  318. }
  319. void
  320. aoecmd_ata_rsp(struct sk_buff *skb)
  321. {
  322. struct aoedev *d;
  323. struct aoe_hdr *hin;
  324. struct aoe_atahdr *ahin, *ahout;
  325. struct frame *f;
  326. struct buf *buf;
  327. struct sk_buff *sl;
  328. register long n;
  329. ulong flags;
  330. char ebuf[128];
  331. u16 aoemajor;
  332. hin = (struct aoe_hdr *) skb->mac.raw;
  333. aoemajor = be16_to_cpu(hin->major);
  334. d = aoedev_by_aoeaddr(aoemajor, hin->minor);
  335. if (d == NULL) {
  336. snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
  337. "for unknown device %d.%d\n",
  338. aoemajor, hin->minor);
  339. aoechr_error(ebuf);
  340. return;
  341. }
  342. spin_lock_irqsave(&d->lock, flags);
  343. f = getframe(d, be32_to_cpu(hin->tag));
  344. if (f == NULL) {
  345. spin_unlock_irqrestore(&d->lock, flags);
  346. snprintf(ebuf, sizeof ebuf,
  347. "%15s e%d.%d tag=%08x@%08lx\n",
  348. "unexpected rsp",
  349. be16_to_cpu(hin->major),
  350. hin->minor,
  351. be32_to_cpu(hin->tag),
  352. jiffies);
  353. aoechr_error(ebuf);
  354. return;
  355. }
  356. calc_rttavg(d, tsince(f->tag));
  357. ahin = (struct aoe_atahdr *) (hin+1);
  358. ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
  359. buf = f->buf;
  360. if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
  361. printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
  362. "stat=%2.2Xh from e%ld.%ld\n",
  363. ahout->cmdstat, ahin->cmdstat,
  364. d->aoemajor, d->aoeminor);
  365. if (buf)
  366. buf->flags |= BUFFL_FAIL;
  367. } else {
  368. switch (ahout->cmdstat) {
  369. case WIN_READ:
  370. case WIN_READ_EXT:
  371. n = ahout->scnt << 9;
  372. if (skb->len - sizeof *hin - sizeof *ahin < n) {
  373. printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
  374. "ata data size in read. skb->len=%d\n",
  375. skb->len);
  376. /* fail frame f? just returning will rexmit. */
  377. spin_unlock_irqrestore(&d->lock, flags);
  378. return;
  379. }
  380. memcpy(f->bufaddr, ahin+1, n);
  381. case WIN_WRITE:
  382. case WIN_WRITE_EXT:
  383. break;
  384. case WIN_IDENTIFY:
  385. if (skb->len - sizeof *hin - sizeof *ahin < 512) {
  386. printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
  387. "in ataid. skb->len=%d\n", skb->len);
  388. spin_unlock_irqrestore(&d->lock, flags);
  389. return;
  390. }
  391. ataid_complete(d, (char *) (ahin+1));
  392. /* d->flags |= DEVFL_WC_UPDATE; */
  393. break;
  394. default:
  395. printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
  396. "outbound ata command %2.2Xh for %d.%d\n",
  397. ahout->cmdstat,
  398. be16_to_cpu(hin->major),
  399. hin->minor);
  400. }
  401. }
  402. if (buf) {
  403. buf->nframesout -= 1;
  404. if (buf->nframesout == 0 && buf->resid == 0) {
  405. unsigned long duration = jiffies - buf->start_time;
  406. unsigned long n_sect = buf->bio->bi_size >> 9;
  407. struct gendisk *disk = d->gd;
  408. if (bio_data_dir(buf->bio) == WRITE) {
  409. disk_stat_inc(disk, writes);
  410. disk_stat_add(disk, write_ticks, duration);
  411. disk_stat_add(disk, write_sectors, n_sect);
  412. } else {
  413. disk_stat_inc(disk, reads);
  414. disk_stat_add(disk, read_ticks, duration);
  415. disk_stat_add(disk, read_sectors, n_sect);
  416. }
  417. disk_stat_add(disk, io_ticks, duration);
  418. n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
  419. bio_endio(buf->bio, buf->bio->bi_size, n);
  420. mempool_free(buf, d->bufpool);
  421. }
  422. }
  423. f->buf = NULL;
  424. f->tag = FREETAG;
  425. aoecmd_work(d);
  426. sl = d->sendq_hd;
  427. d->sendq_hd = d->sendq_tl = NULL;
  428. spin_unlock_irqrestore(&d->lock, flags);
  429. aoenet_xmit(sl);
  430. }
  431. void
  432. aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
  433. {
  434. struct aoe_hdr *h;
  435. struct aoe_cfghdr *ch;
  436. struct sk_buff *skb, *sl;
  437. struct net_device *ifp;
  438. sl = NULL;
  439. read_lock(&dev_base_lock);
  440. for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
  441. dev_hold(ifp);
  442. if (!is_aoe_netif(ifp))
  443. continue;
  444. skb = new_skb(ifp, sizeof *h + sizeof *ch);
  445. if (skb == NULL) {
  446. printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
  447. continue;
  448. }
  449. h = (struct aoe_hdr *) skb->mac.raw;
  450. memset(h, 0, sizeof *h + sizeof *ch);
  451. memset(h->dst, 0xff, sizeof h->dst);
  452. memcpy(h->src, ifp->dev_addr, sizeof h->src);
  453. h->type = __constant_cpu_to_be16(ETH_P_AOE);
  454. h->verfl = AOE_HVER;
  455. h->major = cpu_to_be16(aoemajor);
  456. h->minor = aoeminor;
  457. h->cmd = AOECMD_CFG;
  458. skb->next = sl;
  459. sl = skb;
  460. }
  461. read_unlock(&dev_base_lock);
  462. aoenet_xmit(sl);
  463. }
  464. /*
  465. * Since we only call this in one place (and it only prepares one frame)
  466. * we just return the skb. Usually we'd chain it up to the aoedev sendq.
  467. */
  468. static struct sk_buff *
  469. aoecmd_ata_id(struct aoedev *d)
  470. {
  471. struct aoe_hdr *h;
  472. struct aoe_atahdr *ah;
  473. struct frame *f;
  474. struct sk_buff *skb;
  475. f = getframe(d, FREETAG);
  476. if (f == NULL) {
  477. printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
  478. "This shouldn't happen.\n");
  479. return NULL;
  480. }
  481. /* initialize the headers & frame */
  482. h = (struct aoe_hdr *) f->data;
  483. ah = (struct aoe_atahdr *) (h+1);
  484. f->ndata = sizeof *h + sizeof *ah;
  485. memset(h, 0, f->ndata);
  486. f->tag = aoehdr_atainit(d, h);
  487. f->waited = 0;
  488. f->writedatalen = 0;
  489. /* this message initializes the device, so we reset the rttavg */
  490. d->rttavg = MAXTIMER;
  491. /* set up ata header */
  492. ah->scnt = 1;
  493. ah->cmdstat = WIN_IDENTIFY;
  494. ah->lba3 = 0xa0;
  495. skb = skb_prepare(d, f);
  496. /* we now want to start the rexmit tracking */
  497. d->flags &= ~DEVFL_TKILL;
  498. d->timer.data = (ulong) d;
  499. d->timer.function = rexmit_timer;
  500. d->timer.expires = jiffies + TIMERTICK;
  501. add_timer(&d->timer);
  502. return skb;
  503. }
  504. void
  505. aoecmd_cfg_rsp(struct sk_buff *skb)
  506. {
  507. struct aoedev *d;
  508. struct aoe_hdr *h;
  509. struct aoe_cfghdr *ch;
  510. ulong flags, sysminor, aoemajor;
  511. u16 bufcnt;
  512. struct sk_buff *sl;
  513. enum { MAXFRAMES = 8 };
  514. h = (struct aoe_hdr *) skb->mac.raw;
  515. ch = (struct aoe_cfghdr *) (h+1);
  516. /*
  517. * Enough people have their dip switches set backwards to
  518. * warrant a loud message for this special case.
  519. */
  520. aoemajor = be16_to_cpu(h->major);
  521. if (aoemajor == 0xfff) {
  522. printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
  523. "address is all ones. Check shelf dip switches\n");
  524. return;
  525. }
  526. sysminor = SYSMINOR(aoemajor, h->minor);
  527. if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
  528. printk(KERN_INFO
  529. "aoe: e%ld.%d: minor number too large\n",
  530. aoemajor, (int) h->minor);
  531. return;
  532. }
  533. bufcnt = be16_to_cpu(ch->bufcnt);
  534. if (bufcnt > MAXFRAMES) /* keep it reasonable */
  535. bufcnt = MAXFRAMES;
  536. d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
  537. if (d == NULL) {
  538. printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
  539. return;
  540. }
  541. spin_lock_irqsave(&d->lock, flags);
  542. if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
  543. spin_unlock_irqrestore(&d->lock, flags);
  544. return;
  545. }
  546. d->fw_ver = be16_to_cpu(ch->fwver);
  547. /* we get here only if the device is new */
  548. sl = aoecmd_ata_id(d);
  549. spin_unlock_irqrestore(&d->lock, flags);
  550. aoenet_xmit(sl);
  551. }