blk-ioc.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. * Functions related to io context handling
  3. */
  4. #include <linux/kernel.h>
  5. #include <linux/module.h>
  6. #include <linux/init.h>
  7. #include <linux/bio.h>
  8. #include <linux/blkdev.h>
  9. #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
  10. #include <linux/slab.h>
  11. #include "blk.h"
  12. /*
  13. * For io context allocations
  14. */
  15. static struct kmem_cache *iocontext_cachep;
  16. /**
  17. * get_io_context - increment reference count to io_context
  18. * @ioc: io_context to get
  19. *
  20. * Increment reference count to @ioc.
  21. */
  22. void get_io_context(struct io_context *ioc)
  23. {
  24. BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  25. atomic_long_inc(&ioc->refcount);
  26. }
  27. EXPORT_SYMBOL(get_io_context);
  28. /*
  29. * Releasing ioc may nest into another put_io_context() leading to nested
  30. * fast path release. As the ioc's can't be the same, this is okay but
  31. * makes lockdep whine. Keep track of nesting and use it as subclass.
  32. */
  33. #ifdef CONFIG_LOCKDEP
  34. #define ioc_release_depth(q) ((q) ? (q)->ioc_release_depth : 0)
  35. #define ioc_release_depth_inc(q) (q)->ioc_release_depth++
  36. #define ioc_release_depth_dec(q) (q)->ioc_release_depth--
  37. #else
  38. #define ioc_release_depth(q) 0
  39. #define ioc_release_depth_inc(q) do { } while (0)
  40. #define ioc_release_depth_dec(q) do { } while (0)
  41. #endif
  42. /*
  43. * Slow path for ioc release in put_io_context(). Performs double-lock
  44. * dancing to unlink all cic's and then frees ioc.
  45. */
  46. static void ioc_release_fn(struct work_struct *work)
  47. {
  48. struct io_context *ioc = container_of(work, struct io_context,
  49. release_work);
  50. struct request_queue *last_q = NULL;
  51. spin_lock_irq(&ioc->lock);
  52. while (!hlist_empty(&ioc->cic_list)) {
  53. struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
  54. struct cfq_io_context,
  55. cic_list);
  56. struct request_queue *this_q = cic->q;
  57. if (this_q != last_q) {
  58. /*
  59. * Need to switch to @this_q. Once we release
  60. * @ioc->lock, it can go away along with @cic.
  61. * Hold on to it.
  62. */
  63. __blk_get_queue(this_q);
  64. /*
  65. * blk_put_queue() might sleep thanks to kobject
  66. * idiocy. Always release both locks, put and
  67. * restart.
  68. */
  69. if (last_q) {
  70. spin_unlock(last_q->queue_lock);
  71. spin_unlock_irq(&ioc->lock);
  72. blk_put_queue(last_q);
  73. } else {
  74. spin_unlock_irq(&ioc->lock);
  75. }
  76. last_q = this_q;
  77. spin_lock_irq(this_q->queue_lock);
  78. spin_lock(&ioc->lock);
  79. continue;
  80. }
  81. ioc_release_depth_inc(this_q);
  82. cic->exit(cic);
  83. cic->release(cic);
  84. ioc_release_depth_dec(this_q);
  85. }
  86. if (last_q) {
  87. spin_unlock(last_q->queue_lock);
  88. spin_unlock_irq(&ioc->lock);
  89. blk_put_queue(last_q);
  90. } else {
  91. spin_unlock_irq(&ioc->lock);
  92. }
  93. kmem_cache_free(iocontext_cachep, ioc);
  94. }
  95. /**
  96. * put_io_context - put a reference of io_context
  97. * @ioc: io_context to put
  98. * @locked_q: request_queue the caller is holding queue_lock of (hint)
  99. *
  100. * Decrement reference count of @ioc and release it if the count reaches
  101. * zero. If the caller is holding queue_lock of a queue, it can indicate
  102. * that with @locked_q. This is an optimization hint and the caller is
  103. * allowed to pass in %NULL even when it's holding a queue_lock.
  104. */
  105. void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
  106. {
  107. struct request_queue *last_q = locked_q;
  108. unsigned long flags;
  109. if (ioc == NULL)
  110. return;
  111. BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  112. if (locked_q)
  113. lockdep_assert_held(locked_q->queue_lock);
  114. if (!atomic_long_dec_and_test(&ioc->refcount))
  115. return;
  116. /*
  117. * Destroy @ioc. This is a bit messy because cic's are chained
  118. * from both ioc and queue, and ioc->lock nests inside queue_lock.
  119. * The inner ioc->lock should be held to walk our cic_list and then
  120. * for each cic the outer matching queue_lock should be grabbed.
  121. * ie. We need to do reverse-order double lock dancing.
  122. *
  123. * Another twist is that we are often called with one of the
  124. * matching queue_locks held as indicated by @locked_q, which
  125. * prevents performing double-lock dance for other queues.
  126. *
  127. * So, we do it in two stages. The fast path uses the queue_lock
  128. * the caller is holding and, if other queues need to be accessed,
  129. * uses trylock to avoid introducing locking dependency. This can
  130. * handle most cases, especially if @ioc was performing IO on only
  131. * single device.
  132. *
  133. * If trylock doesn't cut it, we defer to @ioc->release_work which
  134. * can do all the double-locking dancing.
  135. */
  136. spin_lock_irqsave_nested(&ioc->lock, flags,
  137. ioc_release_depth(locked_q));
  138. while (!hlist_empty(&ioc->cic_list)) {
  139. struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
  140. struct cfq_io_context,
  141. cic_list);
  142. struct request_queue *this_q = cic->q;
  143. if (this_q != last_q) {
  144. if (last_q && last_q != locked_q)
  145. spin_unlock(last_q->queue_lock);
  146. last_q = NULL;
  147. if (!spin_trylock(this_q->queue_lock))
  148. break;
  149. last_q = this_q;
  150. continue;
  151. }
  152. ioc_release_depth_inc(this_q);
  153. cic->exit(cic);
  154. cic->release(cic);
  155. ioc_release_depth_dec(this_q);
  156. }
  157. if (last_q && last_q != locked_q)
  158. spin_unlock(last_q->queue_lock);
  159. spin_unlock_irqrestore(&ioc->lock, flags);
  160. /* if no cic's left, we're done; otherwise, kick release_work */
  161. if (hlist_empty(&ioc->cic_list))
  162. kmem_cache_free(iocontext_cachep, ioc);
  163. else
  164. schedule_work(&ioc->release_work);
  165. }
  166. EXPORT_SYMBOL(put_io_context);
  167. /* Called by the exiting task */
  168. void exit_io_context(struct task_struct *task)
  169. {
  170. struct io_context *ioc;
  171. /* PF_EXITING prevents new io_context from being attached to @task */
  172. WARN_ON_ONCE(!(current->flags & PF_EXITING));
  173. task_lock(task);
  174. ioc = task->io_context;
  175. task->io_context = NULL;
  176. task_unlock(task);
  177. atomic_dec(&ioc->nr_tasks);
  178. put_io_context(ioc, NULL);
  179. }
  180. void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
  181. int node)
  182. {
  183. struct io_context *ioc;
  184. ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
  185. node);
  186. if (unlikely(!ioc))
  187. return;
  188. /* initialize */
  189. atomic_long_set(&ioc->refcount, 1);
  190. atomic_set(&ioc->nr_tasks, 1);
  191. spin_lock_init(&ioc->lock);
  192. INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
  193. INIT_HLIST_HEAD(&ioc->cic_list);
  194. INIT_WORK(&ioc->release_work, ioc_release_fn);
  195. /* try to install, somebody might already have beaten us to it */
  196. task_lock(task);
  197. if (!task->io_context && !(task->flags & PF_EXITING))
  198. task->io_context = ioc;
  199. else
  200. kmem_cache_free(iocontext_cachep, ioc);
  201. task_unlock(task);
  202. }
  203. EXPORT_SYMBOL(create_io_context_slowpath);
  204. /**
  205. * get_task_io_context - get io_context of a task
  206. * @task: task of interest
  207. * @gfp_flags: allocation flags, used if allocation is necessary
  208. * @node: allocation node, used if allocation is necessary
  209. *
  210. * Return io_context of @task. If it doesn't exist, it is created with
  211. * @gfp_flags and @node. The returned io_context has its reference count
  212. * incremented.
  213. *
  214. * This function always goes through task_lock() and it's better to use
  215. * %current->io_context + get_io_context() for %current.
  216. */
  217. struct io_context *get_task_io_context(struct task_struct *task,
  218. gfp_t gfp_flags, int node)
  219. {
  220. struct io_context *ioc;
  221. might_sleep_if(gfp_flags & __GFP_WAIT);
  222. do {
  223. task_lock(task);
  224. ioc = task->io_context;
  225. if (likely(ioc)) {
  226. get_io_context(ioc);
  227. task_unlock(task);
  228. return ioc;
  229. }
  230. task_unlock(task);
  231. } while (create_io_context(task, gfp_flags, node));
  232. return NULL;
  233. }
  234. EXPORT_SYMBOL(get_task_io_context);
  235. void ioc_set_changed(struct io_context *ioc, int which)
  236. {
  237. struct cfq_io_context *cic;
  238. struct hlist_node *n;
  239. hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
  240. set_bit(which, &cic->changed);
  241. }
  242. /**
  243. * ioc_ioprio_changed - notify ioprio change
  244. * @ioc: io_context of interest
  245. * @ioprio: new ioprio
  246. *
  247. * @ioc's ioprio has changed to @ioprio. Set %CIC_IOPRIO_CHANGED for all
  248. * cic's. iosched is responsible for checking the bit and applying it on
  249. * request issue path.
  250. */
  251. void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
  252. {
  253. unsigned long flags;
  254. spin_lock_irqsave(&ioc->lock, flags);
  255. ioc->ioprio = ioprio;
  256. ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
  257. spin_unlock_irqrestore(&ioc->lock, flags);
  258. }
  259. /**
  260. * ioc_cgroup_changed - notify cgroup change
  261. * @ioc: io_context of interest
  262. *
  263. * @ioc's cgroup has changed. Set %CIC_CGROUP_CHANGED for all cic's.
  264. * iosched is responsible for checking the bit and applying it on request
  265. * issue path.
  266. */
  267. void ioc_cgroup_changed(struct io_context *ioc)
  268. {
  269. unsigned long flags;
  270. spin_lock_irqsave(&ioc->lock, flags);
  271. ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
  272. spin_unlock_irqrestore(&ioc->lock, flags);
  273. }
  274. static int __init blk_ioc_init(void)
  275. {
  276. iocontext_cachep = kmem_cache_create("blkdev_ioc",
  277. sizeof(struct io_context), 0, SLAB_PANIC, NULL);
  278. return 0;
  279. }
  280. subsys_initcall(blk_ioc_init);