blk-ioc.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /*
  2. * Functions related to io context handling
  3. */
  4. #include <linux/kernel.h>
  5. #include <linux/module.h>
  6. #include <linux/init.h>
  7. #include <linux/bio.h>
  8. #include <linux/blkdev.h>
  9. #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
  10. #include <linux/slab.h>
  11. #include "blk.h"
  12. /*
  13. * For io context allocations
  14. */
  15. static struct kmem_cache *iocontext_cachep;
  16. /**
  17. * get_io_context - increment reference count to io_context
  18. * @ioc: io_context to get
  19. *
  20. * Increment reference count to @ioc.
  21. */
  22. void get_io_context(struct io_context *ioc)
  23. {
  24. BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  25. atomic_long_inc(&ioc->refcount);
  26. }
  27. EXPORT_SYMBOL(get_io_context);
  28. /*
  29. * Releasing ioc may nest into another put_io_context() leading to nested
  30. * fast path release. As the ioc's can't be the same, this is okay but
  31. * makes lockdep whine. Keep track of nesting and use it as subclass.
  32. */
  33. #ifdef CONFIG_LOCKDEP
  34. #define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0)
  35. #define ioc_release_depth_inc(q) (q)->ioc_release_depth++
  36. #define ioc_release_depth_dec(q) (q)->ioc_release_depth--
  37. #else
  38. #define ioc_release_depth(q) 0
  39. #define ioc_release_depth_inc(q) do { } while (0)
  40. #define ioc_release_depth_dec(q) do { } while (0)
  41. #endif
  42. /*
  43. * Slow path for ioc release in put_io_context(). Performs double-lock
  44. * dancing to unlink all icq's and then frees ioc.
  45. */
  46. static void ioc_release_fn(struct work_struct *work)
  47. {
  48. struct io_context *ioc = container_of(work, struct io_context,
  49. release_work);
  50. struct request_queue *last_q = NULL;
  51. spin_lock_irq(&ioc->lock);
  52. while (!hlist_empty(&ioc->icq_list)) {
  53. struct io_cq *icq = hlist_entry(ioc->icq_list.first,
  54. struct io_cq, ioc_node);
  55. struct request_queue *this_q = icq->q;
  56. if (this_q != last_q) {
  57. /*
  58. * Need to switch to @this_q. Once we release
  59. * @ioc->lock, it can go away along with @cic.
  60. * Hold on to it.
  61. */
  62. __blk_get_queue(this_q);
  63. /*
  64. * blk_put_queue() might sleep thanks to kobject
  65. * idiocy. Always release both locks, put and
  66. * restart.
  67. */
  68. if (last_q) {
  69. spin_unlock(last_q->queue_lock);
  70. spin_unlock_irq(&ioc->lock);
  71. blk_put_queue(last_q);
  72. } else {
  73. spin_unlock_irq(&ioc->lock);
  74. }
  75. last_q = this_q;
  76. spin_lock_irq(this_q->queue_lock);
  77. spin_lock(&ioc->lock);
  78. continue;
  79. }
  80. ioc_release_depth_inc(this_q);
  81. icq->exit(icq);
  82. icq->release(icq);
  83. ioc_release_depth_dec(this_q);
  84. }
  85. if (last_q) {
  86. spin_unlock(last_q->queue_lock);
  87. spin_unlock_irq(&ioc->lock);
  88. blk_put_queue(last_q);
  89. } else {
  90. spin_unlock_irq(&ioc->lock);
  91. }
  92. kmem_cache_free(iocontext_cachep, ioc);
  93. }
  94. /**
  95. * put_io_context - put a reference of io_context
  96. * @ioc: io_context to put
  97. * @locked_q: request_queue the caller is holding queue_lock of (hint)
  98. *
  99. * Decrement reference count of @ioc and release it if the count reaches
  100. * zero. If the caller is holding queue_lock of a queue, it can indicate
  101. * that with @locked_q. This is an optimization hint and the caller is
  102. * allowed to pass in %NULL even when it's holding a queue_lock.
  103. */
  104. void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
  105. {
  106. struct request_queue *last_q = locked_q;
  107. unsigned long flags;
  108. if (ioc == NULL)
  109. return;
  110. BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  111. if (locked_q)
  112. lockdep_assert_held(locked_q->queue_lock);
  113. if (!atomic_long_dec_and_test(&ioc->refcount))
  114. return;
  115. /*
  116. * Destroy @ioc. This is a bit messy because icq's are chained
  117. * from both ioc and queue, and ioc->lock nests inside queue_lock.
  118. * The inner ioc->lock should be held to walk our icq_list and then
  119. * for each icq the outer matching queue_lock should be grabbed.
  120. * ie. We need to do reverse-order double lock dancing.
  121. *
  122. * Another twist is that we are often called with one of the
  123. * matching queue_locks held as indicated by @locked_q, which
  124. * prevents performing double-lock dance for other queues.
  125. *
  126. * So, we do it in two stages. The fast path uses the queue_lock
  127. * the caller is holding and, if other queues need to be accessed,
  128. * uses trylock to avoid introducing locking dependency. This can
  129. * handle most cases, especially if @ioc was performing IO on only
  130. * single device.
  131. *
  132. * If trylock doesn't cut it, we defer to @ioc->release_work which
  133. * can do all the double-locking dancing.
  134. */
  135. spin_lock_irqsave_nested(&ioc->lock, flags,
  136. ioc_release_depth(locked_q));
  137. while (!hlist_empty(&ioc->icq_list)) {
  138. struct io_cq *icq = hlist_entry(ioc->icq_list.first,
  139. struct io_cq, ioc_node);
  140. struct request_queue *this_q = icq->q;
  141. if (this_q != last_q) {
  142. if (last_q && last_q != locked_q)
  143. spin_unlock(last_q->queue_lock);
  144. last_q = NULL;
  145. if (!spin_trylock(this_q->queue_lock))
  146. break;
  147. last_q = this_q;
  148. continue;
  149. }
  150. ioc_release_depth_inc(this_q);
  151. icq->exit(icq);
  152. icq->release(icq);
  153. ioc_release_depth_dec(this_q);
  154. }
  155. if (last_q && last_q != locked_q)
  156. spin_unlock(last_q->queue_lock);
  157. spin_unlock_irqrestore(&ioc->lock, flags);
  158. /* if no icq is left, we're done; otherwise, kick release_work */
  159. if (hlist_empty(&ioc->icq_list))
  160. kmem_cache_free(iocontext_cachep, ioc);
  161. else
  162. schedule_work(&ioc->release_work);
  163. }
  164. EXPORT_SYMBOL(put_io_context);
  165. /* Called by the exiting task */
  166. void exit_io_context(struct task_struct *task)
  167. {
  168. struct io_context *ioc;
  169. /* PF_EXITING prevents new io_context from being attached to @task */
  170. WARN_ON_ONCE(!(current->flags & PF_EXITING));
  171. task_lock(task);
  172. ioc = task->io_context;
  173. task->io_context = NULL;
  174. task_unlock(task);
  175. atomic_dec(&ioc->nr_tasks);
  176. put_io_context(ioc, NULL);
  177. }
  178. void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
  179. int node)
  180. {
  181. struct io_context *ioc;
  182. ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
  183. node);
  184. if (unlikely(!ioc))
  185. return;
  186. /* initialize */
  187. atomic_long_set(&ioc->refcount, 1);
  188. atomic_set(&ioc->nr_tasks, 1);
  189. spin_lock_init(&ioc->lock);
  190. INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
  191. INIT_HLIST_HEAD(&ioc->icq_list);
  192. INIT_WORK(&ioc->release_work, ioc_release_fn);
  193. /* try to install, somebody might already have beaten us to it */
  194. task_lock(task);
  195. if (!task->io_context && !(task->flags & PF_EXITING))
  196. task->io_context = ioc;
  197. else
  198. kmem_cache_free(iocontext_cachep, ioc);
  199. task_unlock(task);
  200. }
  201. EXPORT_SYMBOL(create_io_context_slowpath);
  202. /**
  203. * get_task_io_context - get io_context of a task
  204. * @task: task of interest
  205. * @gfp_flags: allocation flags, used if allocation is necessary
  206. * @node: allocation node, used if allocation is necessary
  207. *
  208. * Return io_context of @task. If it doesn't exist, it is created with
  209. * @gfp_flags and @node. The returned io_context has its reference count
  210. * incremented.
  211. *
  212. * This function always goes through task_lock() and it's better to use
  213. * %current->io_context + get_io_context() for %current.
  214. */
  215. struct io_context *get_task_io_context(struct task_struct *task,
  216. gfp_t gfp_flags, int node)
  217. {
  218. struct io_context *ioc;
  219. might_sleep_if(gfp_flags & __GFP_WAIT);
  220. do {
  221. task_lock(task);
  222. ioc = task->io_context;
  223. if (likely(ioc)) {
  224. get_io_context(ioc);
  225. task_unlock(task);
  226. return ioc;
  227. }
  228. task_unlock(task);
  229. } while (create_io_context(task, gfp_flags, node));
  230. return NULL;
  231. }
  232. EXPORT_SYMBOL(get_task_io_context);
  233. /**
  234. * ioc_lookup_icq - lookup io_cq from ioc
  235. * @ioc: the associated io_context
  236. * @q: the associated request_queue
  237. *
  238. * Look up io_cq associated with @ioc - @q pair from @ioc. Must be called
  239. * with @q->queue_lock held.
  240. */
  241. struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
  242. {
  243. struct io_cq *icq;
  244. lockdep_assert_held(q->queue_lock);
  245. /*
  246. * icq's are indexed from @ioc using radix tree and hint pointer,
  247. * both of which are protected with RCU. All removals are done
  248. * holding both q and ioc locks, and we're holding q lock - if we
  249. * find a icq which points to us, it's guaranteed to be valid.
  250. */
  251. rcu_read_lock();
  252. icq = rcu_dereference(ioc->icq_hint);
  253. if (icq && icq->q == q)
  254. goto out;
  255. icq = radix_tree_lookup(&ioc->icq_tree, q->id);
  256. if (icq && icq->q == q)
  257. rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
  258. else
  259. icq = NULL;
  260. out:
  261. rcu_read_unlock();
  262. return icq;
  263. }
  264. EXPORT_SYMBOL(ioc_lookup_icq);
  265. void ioc_set_changed(struct io_context *ioc, int which)
  266. {
  267. struct io_cq *icq;
  268. struct hlist_node *n;
  269. hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
  270. set_bit(which, &icq->changed);
  271. }
  272. /**
  273. * ioc_ioprio_changed - notify ioprio change
  274. * @ioc: io_context of interest
  275. * @ioprio: new ioprio
  276. *
  277. * @ioc's ioprio has changed to @ioprio. Set %ICQ_IOPRIO_CHANGED for all
  278. * icq's. iosched is responsible for checking the bit and applying it on
  279. * request issue path.
  280. */
  281. void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
  282. {
  283. unsigned long flags;
  284. spin_lock_irqsave(&ioc->lock, flags);
  285. ioc->ioprio = ioprio;
  286. ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
  287. spin_unlock_irqrestore(&ioc->lock, flags);
  288. }
  289. /**
  290. * ioc_cgroup_changed - notify cgroup change
  291. * @ioc: io_context of interest
  292. *
  293. * @ioc's cgroup has changed. Set %ICQ_CGROUP_CHANGED for all icq's.
  294. * iosched is responsible for checking the bit and applying it on request
  295. * issue path.
  296. */
  297. void ioc_cgroup_changed(struct io_context *ioc)
  298. {
  299. unsigned long flags;
  300. spin_lock_irqsave(&ioc->lock, flags);
  301. ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
  302. spin_unlock_irqrestore(&ioc->lock, flags);
  303. }
  304. static int __init blk_ioc_init(void)
  305. {
  306. iocontext_cachep = kmem_cache_create("blkdev_ioc",
  307. sizeof(struct io_context), 0, SLAB_PANIC, NULL);
  308. return 0;
  309. }
  310. subsys_initcall(blk_ioc_init);