aoedev.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */
  2. /*
  3. * aoedev.c
  4. * AoE device utility functions; maintains device list.
  5. */
  6. #include <linux/hdreg.h>
  7. #include <linux/blkdev.h>
  8. #include <linux/netdevice.h>
  9. #include <linux/delay.h>
  10. #include <linux/slab.h>
  11. #include "aoe.h"
  12. static void dummy_timer(ulong);
  13. static void aoedev_freedev(struct aoedev *);
  14. static void freetgt(struct aoedev *d, struct aoetgt *t);
  15. static void skbpoolfree(struct aoedev *d);
  16. static struct aoedev *devlist;
  17. static DEFINE_SPINLOCK(devlist_lock);
  18. /*
  19. * Users who grab a pointer to the device with aoedev_by_aoeaddr or
  20. * aoedev_by_sysminor_m automatically get a reference count and must
  21. * be responsible for performing a aoedev_put. With the addition of
  22. * async kthread processing I'm no longer confident that we can
  23. * guarantee consistency in the face of device flushes.
  24. *
  25. * For the time being, we only bother to add extra references for
  26. * frames sitting on the iocq. When the kthreads finish processing
  27. * these frames, they will aoedev_put the device.
  28. */
  29. struct aoedev *
  30. aoedev_by_aoeaddr(int maj, int min)
  31. {
  32. struct aoedev *d;
  33. ulong flags;
  34. spin_lock_irqsave(&devlist_lock, flags);
  35. for (d=devlist; d; d=d->next)
  36. if (d->aoemajor == maj && d->aoeminor == min) {
  37. d->ref++;
  38. break;
  39. }
  40. spin_unlock_irqrestore(&devlist_lock, flags);
  41. return d;
  42. }
  43. void
  44. aoedev_put(struct aoedev *d)
  45. {
  46. ulong flags;
  47. spin_lock_irqsave(&devlist_lock, flags);
  48. d->ref--;
  49. spin_unlock_irqrestore(&devlist_lock, flags);
  50. }
  51. static void
  52. dummy_timer(ulong vp)
  53. {
  54. struct aoedev *d;
  55. d = (struct aoedev *)vp;
  56. if (d->flags & DEVFL_TKILL)
  57. return;
  58. d->timer.expires = jiffies + HZ;
  59. add_timer(&d->timer);
  60. }
  61. static void
  62. aoe_failip(struct aoedev *d)
  63. {
  64. struct request *rq;
  65. struct bio *bio;
  66. unsigned long n;
  67. aoe_failbuf(d, d->ip.buf);
  68. rq = d->ip.rq;
  69. if (rq == NULL)
  70. return;
  71. while ((bio = d->ip.nxbio)) {
  72. clear_bit(BIO_UPTODATE, &bio->bi_flags);
  73. d->ip.nxbio = bio->bi_next;
  74. n = (unsigned long) rq->special;
  75. rq->special = (void *) --n;
  76. }
  77. if ((unsigned long) rq->special == 0)
  78. aoe_end_request(d, rq, 0);
  79. }
  80. void
  81. aoedev_downdev(struct aoedev *d)
  82. {
  83. struct aoetgt *t, **tt, **te;
  84. struct frame *f;
  85. struct list_head *head, *pos, *nx;
  86. struct request *rq;
  87. int i;
  88. d->flags &= ~DEVFL_UP;
  89. /* clean out active buffers */
  90. for (i = 0; i < NFACTIVE; i++) {
  91. head = &d->factive[i];
  92. list_for_each_safe(pos, nx, head) {
  93. f = list_entry(pos, struct frame, head);
  94. list_del(pos);
  95. if (f->buf) {
  96. f->buf->nframesout--;
  97. aoe_failbuf(d, f->buf);
  98. }
  99. aoe_freetframe(f);
  100. }
  101. }
  102. /* reset window dressings */
  103. tt = d->targets;
  104. te = tt + NTARGETS;
  105. for (; tt < te && (t = *tt); tt++) {
  106. t->maxout = t->nframes;
  107. t->nout = 0;
  108. }
  109. /* clean out the in-process request (if any) */
  110. aoe_failip(d);
  111. d->htgt = NULL;
  112. /* fast fail all pending I/O */
  113. if (d->blkq) {
  114. while ((rq = blk_peek_request(d->blkq))) {
  115. blk_start_request(rq);
  116. aoe_end_request(d, rq, 1);
  117. }
  118. }
  119. if (d->gd)
  120. set_capacity(d->gd, 0);
  121. }
  122. static void
  123. aoedev_freedev(struct aoedev *d)
  124. {
  125. struct aoetgt **t, **e;
  126. cancel_work_sync(&d->work);
  127. if (d->gd) {
  128. aoedisk_rm_sysfs(d);
  129. del_gendisk(d->gd);
  130. put_disk(d->gd);
  131. blk_cleanup_queue(d->blkq);
  132. }
  133. t = d->targets;
  134. e = t + NTARGETS;
  135. for (; t < e && *t; t++)
  136. freetgt(d, *t);
  137. if (d->bufpool)
  138. mempool_destroy(d->bufpool);
  139. skbpoolfree(d);
  140. kfree(d);
  141. }
  142. int
  143. aoedev_flush(const char __user *str, size_t cnt)
  144. {
  145. ulong flags;
  146. struct aoedev *d, **dd;
  147. struct aoedev *rmd = NULL;
  148. char buf[16];
  149. int all = 0;
  150. if (cnt >= 3) {
  151. if (cnt > sizeof buf)
  152. cnt = sizeof buf;
  153. if (copy_from_user(buf, str, cnt))
  154. return -EFAULT;
  155. all = !strncmp(buf, "all", 3);
  156. }
  157. spin_lock_irqsave(&devlist_lock, flags);
  158. dd = &devlist;
  159. while ((d = *dd)) {
  160. spin_lock(&d->lock);
  161. if ((!all && (d->flags & DEVFL_UP))
  162. || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
  163. || d->nopen
  164. || d->ref) {
  165. spin_unlock(&d->lock);
  166. dd = &d->next;
  167. continue;
  168. }
  169. *dd = d->next;
  170. aoedev_downdev(d);
  171. d->flags |= DEVFL_TKILL;
  172. spin_unlock(&d->lock);
  173. d->next = rmd;
  174. rmd = d;
  175. }
  176. spin_unlock_irqrestore(&devlist_lock, flags);
  177. while ((d = rmd)) {
  178. rmd = d->next;
  179. del_timer_sync(&d->timer);
  180. aoedev_freedev(d); /* must be able to sleep */
  181. }
  182. return 0;
  183. }
  184. /* This has been confirmed to occur once with Tms=3*1000 due to the
  185. * driver changing link and not processing its transmit ring. The
  186. * problem is hard enough to solve by returning an error that I'm
  187. * still punting on "solving" this.
  188. */
  189. static void
  190. skbfree(struct sk_buff *skb)
  191. {
  192. enum { Sms = 250, Tms = 30 * 1000};
  193. int i = Tms / Sms;
  194. if (skb == NULL)
  195. return;
  196. while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
  197. msleep(Sms);
  198. if (i < 0) {
  199. printk(KERN_ERR
  200. "aoe: %s holds ref: %s\n",
  201. skb->dev ? skb->dev->name : "netif",
  202. "cannot free skb -- memory leaked.");
  203. return;
  204. }
  205. skb->truesize -= skb->data_len;
  206. skb_shinfo(skb)->nr_frags = skb->data_len = 0;
  207. skb_trim(skb, 0);
  208. dev_kfree_skb(skb);
  209. }
  210. static void
  211. skbpoolfree(struct aoedev *d)
  212. {
  213. struct sk_buff *skb, *tmp;
  214. skb_queue_walk_safe(&d->skbpool, skb, tmp)
  215. skbfree(skb);
  216. __skb_queue_head_init(&d->skbpool);
  217. }
  218. /* find it or malloc it */
  219. struct aoedev *
  220. aoedev_by_sysminor_m(ulong sysminor)
  221. {
  222. struct aoedev *d;
  223. int i;
  224. ulong flags;
  225. spin_lock_irqsave(&devlist_lock, flags);
  226. for (d=devlist; d; d=d->next)
  227. if (d->sysminor == sysminor) {
  228. d->ref++;
  229. break;
  230. }
  231. if (d)
  232. goto out;
  233. d = kcalloc(1, sizeof *d, GFP_ATOMIC);
  234. if (!d)
  235. goto out;
  236. INIT_WORK(&d->work, aoecmd_sleepwork);
  237. spin_lock_init(&d->lock);
  238. skb_queue_head_init(&d->skbpool);
  239. init_timer(&d->timer);
  240. d->timer.data = (ulong) d;
  241. d->timer.function = dummy_timer;
  242. d->timer.expires = jiffies + HZ;
  243. add_timer(&d->timer);
  244. d->bufpool = NULL; /* defer to aoeblk_gdalloc */
  245. d->tgt = d->targets;
  246. d->ref = 1;
  247. for (i = 0; i < NFACTIVE; i++)
  248. INIT_LIST_HEAD(&d->factive[i]);
  249. d->sysminor = sysminor;
  250. d->aoemajor = AOEMAJOR(sysminor);
  251. d->aoeminor = AOEMINOR(sysminor);
  252. d->mintimer = MINTIMER;
  253. d->next = devlist;
  254. devlist = d;
  255. out:
  256. spin_unlock_irqrestore(&devlist_lock, flags);
  257. return d;
  258. }
  259. static void
  260. freetgt(struct aoedev *d, struct aoetgt *t)
  261. {
  262. struct frame *f;
  263. struct list_head *pos, *nx, *head;
  264. struct aoeif *ifp;
  265. for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
  266. if (!ifp->nd)
  267. break;
  268. dev_put(ifp->nd);
  269. }
  270. head = &t->ffree;
  271. list_for_each_safe(pos, nx, head) {
  272. list_del(pos);
  273. f = list_entry(pos, struct frame, head);
  274. skbfree(f->skb);
  275. kfree(f);
  276. }
  277. kfree(t);
  278. }
  279. void
  280. aoedev_exit(void)
  281. {
  282. struct aoedev *d;
  283. ulong flags;
  284. aoe_flush_iocq();
  285. while ((d = devlist)) {
  286. devlist = d->next;
  287. spin_lock_irqsave(&d->lock, flags);
  288. aoedev_downdev(d);
  289. d->flags |= DEVFL_TKILL;
  290. spin_unlock_irqrestore(&d->lock, flags);
  291. del_timer_sync(&d->timer);
  292. aoedev_freedev(d);
  293. }
  294. }
  295. int __init
  296. aoedev_init(void)
  297. {
  298. return 0;
  299. }