aoedev.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
  2. /*
  3. * aoedev.c
  4. * AoE device utility functions; maintains device list.
  5. */
  6. #include <linux/hdreg.h>
  7. #include <linux/blkdev.h>
  8. #include <linux/netdevice.h>
  9. #include <linux/delay.h>
  10. #include <linux/slab.h>
  11. #include <linux/bitmap.h>
  12. #include <linux/kdev_t.h>
  13. #include <linux/moduleparam.h>
  14. #include "aoe.h"
  15. static void dummy_timer(ulong);
  16. static void aoedev_freedev(struct aoedev *);
  17. static void freetgt(struct aoedev *d, struct aoetgt *t);
  18. static void skbpoolfree(struct aoedev *d);
  19. static int aoe_dyndevs = 1;
  20. module_param(aoe_dyndevs, int, 0644);
  21. MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  22. static struct aoedev *devlist;
  23. static DEFINE_SPINLOCK(devlist_lock);
  24. /* Because some systems will have one, many, or no
  25. * - partitions,
  26. * - slots per shelf,
  27. * - or shelves,
  28. * we need some flexibility in the way the minor numbers
  29. * are allocated. So they are dynamic.
  30. */
  31. #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  32. static DEFINE_SPINLOCK(used_minors_lock);
  33. static DECLARE_BITMAP(used_minors, N_DEVS);
  34. static int
  35. minor_get_dyn(ulong *sysminor)
  36. {
  37. ulong flags;
  38. ulong n;
  39. int error = 0;
  40. spin_lock_irqsave(&used_minors_lock, flags);
  41. n = find_first_zero_bit(used_minors, N_DEVS);
  42. if (n < N_DEVS)
  43. set_bit(n, used_minors);
  44. else
  45. error = -1;
  46. spin_unlock_irqrestore(&used_minors_lock, flags);
  47. *sysminor = n * AOE_PARTITIONS;
  48. return error;
  49. }
  50. static int
  51. minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  52. {
  53. ulong flags;
  54. ulong n;
  55. int error = 0;
  56. enum {
  57. /* for backwards compatibility when !aoe_dyndevs,
  58. * a static number of supported slots per shelf */
  59. NPERSHELF = 16,
  60. };
  61. n = aoemaj * NPERSHELF + aoemin;
  62. if (aoemin >= NPERSHELF || n >= N_DEVS) {
  63. pr_err("aoe: %s with e%ld.%d\n",
  64. "cannot use static minor device numbers",
  65. aoemaj, aoemin);
  66. error = -1;
  67. } else {
  68. spin_lock_irqsave(&used_minors_lock, flags);
  69. if (test_bit(n, used_minors)) {
  70. pr_err("aoe: %s %lu\n",
  71. "existing device already has static minor number",
  72. n);
  73. error = -1;
  74. } else
  75. set_bit(n, used_minors);
  76. spin_unlock_irqrestore(&used_minors_lock, flags);
  77. }
  78. *sysminor = n;
  79. return error;
  80. }
  81. static int
  82. minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
  83. {
  84. if (aoe_dyndevs)
  85. return minor_get_dyn(sysminor);
  86. else
  87. return minor_get_static(sysminor, aoemaj, aoemin);
  88. }
  89. static void
  90. minor_free(ulong minor)
  91. {
  92. ulong flags;
  93. minor /= AOE_PARTITIONS;
  94. BUG_ON(minor >= N_DEVS);
  95. spin_lock_irqsave(&used_minors_lock, flags);
  96. BUG_ON(!test_bit(minor, used_minors));
  97. clear_bit(minor, used_minors);
  98. spin_unlock_irqrestore(&used_minors_lock, flags);
  99. }
  100. /*
  101. * Users who grab a pointer to the device with aoedev_by_aoeaddr
  102. * automatically get a reference count and must be responsible
  103. * for performing a aoedev_put. With the addition of async
  104. * kthread processing I'm no longer confident that we can
  105. * guarantee consistency in the face of device flushes.
  106. *
  107. * For the time being, we only bother to add extra references for
  108. * frames sitting on the iocq. When the kthreads finish processing
  109. * these frames, they will aoedev_put the device.
  110. */
  111. void
  112. aoedev_put(struct aoedev *d)
  113. {
  114. ulong flags;
  115. spin_lock_irqsave(&devlist_lock, flags);
  116. d->ref--;
  117. spin_unlock_irqrestore(&devlist_lock, flags);
  118. }
  119. static void
  120. dummy_timer(ulong vp)
  121. {
  122. struct aoedev *d;
  123. d = (struct aoedev *)vp;
  124. if (d->flags & DEVFL_TKILL)
  125. return;
  126. d->timer.expires = jiffies + HZ;
  127. add_timer(&d->timer);
  128. }
  129. static void
  130. aoe_failip(struct aoedev *d)
  131. {
  132. struct request *rq;
  133. struct bio *bio;
  134. unsigned long n;
  135. aoe_failbuf(d, d->ip.buf);
  136. rq = d->ip.rq;
  137. if (rq == NULL)
  138. return;
  139. while ((bio = d->ip.nxbio)) {
  140. clear_bit(BIO_UPTODATE, &bio->bi_flags);
  141. d->ip.nxbio = bio->bi_next;
  142. n = (unsigned long) rq->special;
  143. rq->special = (void *) --n;
  144. }
  145. if ((unsigned long) rq->special == 0)
  146. aoe_end_request(d, rq, 0);
  147. }
  148. void
  149. aoedev_downdev(struct aoedev *d)
  150. {
  151. struct aoetgt *t, **tt, **te;
  152. struct frame *f;
  153. struct list_head *head, *pos, *nx;
  154. struct request *rq;
  155. int i;
  156. d->flags &= ~DEVFL_UP;
  157. /* clean out active buffers */
  158. for (i = 0; i < NFACTIVE; i++) {
  159. head = &d->factive[i];
  160. list_for_each_safe(pos, nx, head) {
  161. f = list_entry(pos, struct frame, head);
  162. list_del(pos);
  163. if (f->buf) {
  164. f->buf->nframesout--;
  165. aoe_failbuf(d, f->buf);
  166. }
  167. aoe_freetframe(f);
  168. }
  169. }
  170. /* reset window dressings */
  171. tt = d->targets;
  172. te = tt + NTARGETS;
  173. for (; tt < te && (t = *tt); tt++) {
  174. t->maxout = t->nframes;
  175. t->nout = 0;
  176. }
  177. /* clean out the in-process request (if any) */
  178. aoe_failip(d);
  179. d->htgt = NULL;
  180. /* fast fail all pending I/O */
  181. if (d->blkq) {
  182. while ((rq = blk_peek_request(d->blkq))) {
  183. blk_start_request(rq);
  184. aoe_end_request(d, rq, 1);
  185. }
  186. }
  187. if (d->gd)
  188. set_capacity(d->gd, 0);
  189. }
  190. static void
  191. aoedev_freedev(struct aoedev *d)
  192. {
  193. struct aoetgt **t, **e;
  194. cancel_work_sync(&d->work);
  195. if (d->gd) {
  196. aoedisk_rm_sysfs(d);
  197. del_gendisk(d->gd);
  198. put_disk(d->gd);
  199. blk_cleanup_queue(d->blkq);
  200. }
  201. t = d->targets;
  202. e = t + NTARGETS;
  203. for (; t < e && *t; t++)
  204. freetgt(d, *t);
  205. if (d->bufpool)
  206. mempool_destroy(d->bufpool);
  207. skbpoolfree(d);
  208. minor_free(d->sysminor);
  209. kfree(d);
  210. }
  211. int
  212. aoedev_flush(const char __user *str, size_t cnt)
  213. {
  214. ulong flags;
  215. struct aoedev *d, **dd;
  216. struct aoedev *rmd = NULL;
  217. char buf[16];
  218. int all = 0;
  219. if (cnt >= 3) {
  220. if (cnt > sizeof buf)
  221. cnt = sizeof buf;
  222. if (copy_from_user(buf, str, cnt))
  223. return -EFAULT;
  224. all = !strncmp(buf, "all", 3);
  225. }
  226. spin_lock_irqsave(&devlist_lock, flags);
  227. dd = &devlist;
  228. while ((d = *dd)) {
  229. spin_lock(&d->lock);
  230. if ((!all && (d->flags & DEVFL_UP))
  231. || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
  232. || d->nopen
  233. || d->ref) {
  234. spin_unlock(&d->lock);
  235. dd = &d->next;
  236. continue;
  237. }
  238. *dd = d->next;
  239. aoedev_downdev(d);
  240. d->flags |= DEVFL_TKILL;
  241. spin_unlock(&d->lock);
  242. d->next = rmd;
  243. rmd = d;
  244. }
  245. spin_unlock_irqrestore(&devlist_lock, flags);
  246. while ((d = rmd)) {
  247. rmd = d->next;
  248. del_timer_sync(&d->timer);
  249. aoedev_freedev(d); /* must be able to sleep */
  250. }
  251. return 0;
  252. }
  253. /* This has been confirmed to occur once with Tms=3*1000 due to the
  254. * driver changing link and not processing its transmit ring. The
  255. * problem is hard enough to solve by returning an error that I'm
  256. * still punting on "solving" this.
  257. */
  258. static void
  259. skbfree(struct sk_buff *skb)
  260. {
  261. enum { Sms = 250, Tms = 30 * 1000};
  262. int i = Tms / Sms;
  263. if (skb == NULL)
  264. return;
  265. while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
  266. msleep(Sms);
  267. if (i < 0) {
  268. printk(KERN_ERR
  269. "aoe: %s holds ref: %s\n",
  270. skb->dev ? skb->dev->name : "netif",
  271. "cannot free skb -- memory leaked.");
  272. return;
  273. }
  274. skb->truesize -= skb->data_len;
  275. skb_shinfo(skb)->nr_frags = skb->data_len = 0;
  276. skb_trim(skb, 0);
  277. dev_kfree_skb(skb);
  278. }
  279. static void
  280. skbpoolfree(struct aoedev *d)
  281. {
  282. struct sk_buff *skb, *tmp;
  283. skb_queue_walk_safe(&d->skbpool, skb, tmp)
  284. skbfree(skb);
  285. __skb_queue_head_init(&d->skbpool);
  286. }
  287. /* find it or allocate it */
  288. struct aoedev *
  289. aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
  290. {
  291. struct aoedev *d;
  292. int i;
  293. ulong flags;
  294. ulong sysminor;
  295. spin_lock_irqsave(&devlist_lock, flags);
  296. for (d=devlist; d; d=d->next)
  297. if (d->aoemajor == maj && d->aoeminor == min) {
  298. d->ref++;
  299. break;
  300. }
  301. if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
  302. goto out;
  303. d = kcalloc(1, sizeof *d, GFP_ATOMIC);
  304. if (!d)
  305. goto out;
  306. INIT_WORK(&d->work, aoecmd_sleepwork);
  307. spin_lock_init(&d->lock);
  308. skb_queue_head_init(&d->skbpool);
  309. init_timer(&d->timer);
  310. d->timer.data = (ulong) d;
  311. d->timer.function = dummy_timer;
  312. d->timer.expires = jiffies + HZ;
  313. add_timer(&d->timer);
  314. d->bufpool = NULL; /* defer to aoeblk_gdalloc */
  315. d->tgt = d->targets;
  316. d->ref = 1;
  317. for (i = 0; i < NFACTIVE; i++)
  318. INIT_LIST_HEAD(&d->factive[i]);
  319. d->sysminor = sysminor;
  320. d->aoemajor = maj;
  321. d->aoeminor = min;
  322. d->mintimer = MINTIMER;
  323. d->next = devlist;
  324. devlist = d;
  325. out:
  326. spin_unlock_irqrestore(&devlist_lock, flags);
  327. return d;
  328. }
  329. static void
  330. freetgt(struct aoedev *d, struct aoetgt *t)
  331. {
  332. struct frame *f;
  333. struct list_head *pos, *nx, *head;
  334. struct aoeif *ifp;
  335. for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
  336. if (!ifp->nd)
  337. break;
  338. dev_put(ifp->nd);
  339. }
  340. head = &t->ffree;
  341. list_for_each_safe(pos, nx, head) {
  342. list_del(pos);
  343. f = list_entry(pos, struct frame, head);
  344. skbfree(f->skb);
  345. kfree(f);
  346. }
  347. kfree(t);
  348. }
  349. void
  350. aoedev_exit(void)
  351. {
  352. struct aoedev *d;
  353. ulong flags;
  354. aoe_flush_iocq();
  355. while ((d = devlist)) {
  356. devlist = d->next;
  357. spin_lock_irqsave(&d->lock, flags);
  358. aoedev_downdev(d);
  359. d->flags |= DEVFL_TKILL;
  360. spin_unlock_irqrestore(&d->lock, flags);
  361. del_timer_sync(&d->timer);
  362. aoedev_freedev(d);
  363. }
  364. }
  365. int __init
  366. aoedev_init(void)
  367. {
  368. return 0;
  369. }