blk-cgroup.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. /*
  2. * Common Block IO controller cgroup interface
  3. *
  4. * Based on ideas and code from CFQ, CFS and BFQ:
  5. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  6. *
  7. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  8. * Paolo Valente <paolo.valente@unimore.it>
  9. *
  10. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11. * Nauman Rafique <nauman@google.com>
  12. */
  13. #include <linux/ioprio.h>
  14. #include <linux/kdev_t.h>
  15. #include <linux/module.h>
  16. #include <linux/err.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/slab.h>
  19. #include <linux/genhd.h>
  20. #include <linux/delay.h>
  21. #include <linux/atomic.h>
  22. #include "blk-cgroup.h"
  23. #include "blk.h"
  24. #define MAX_KEY_LEN 100
  25. static DEFINE_SPINLOCK(blkio_list_lock);
  26. static LIST_HEAD(blkio_list);
  27. static DEFINE_MUTEX(all_q_mutex);
  28. static LIST_HEAD(all_q_list);
  29. struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
  30. EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  31. static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
  32. struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  33. {
  34. return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
  35. struct blkio_cgroup, css);
  36. }
  37. EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  38. static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
  39. {
  40. return container_of(task_subsys_state(tsk, blkio_subsys_id),
  41. struct blkio_cgroup, css);
  42. }
  43. struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
  44. {
  45. if (bio && bio->bi_css)
  46. return container_of(bio->bi_css, struct blkio_cgroup, css);
  47. return task_blkio_cgroup(current);
  48. }
  49. EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
  50. /**
  51. * blkg_free - free a blkg
  52. * @blkg: blkg to free
  53. *
  54. * Free @blkg which may be partially allocated.
  55. */
  56. static void blkg_free(struct blkio_group *blkg)
  57. {
  58. int i;
  59. if (!blkg)
  60. return;
  61. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  62. struct blkio_policy_type *pol = blkio_policy[i];
  63. struct blkg_policy_data *pd = blkg->pd[i];
  64. if (!pd)
  65. continue;
  66. if (pol && pol->ops.blkio_exit_group_fn)
  67. pol->ops.blkio_exit_group_fn(blkg);
  68. kfree(pd);
  69. }
  70. kfree(blkg);
  71. }
  72. /**
  73. * blkg_alloc - allocate a blkg
  74. * @blkcg: block cgroup the new blkg is associated with
  75. * @q: request_queue the new blkg is associated with
  76. *
  77. * Allocate a new blkg assocating @blkcg and @q.
  78. */
  79. static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
  80. struct request_queue *q)
  81. {
  82. struct blkio_group *blkg;
  83. int i;
  84. /* alloc and init base part */
  85. blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
  86. if (!blkg)
  87. return NULL;
  88. blkg->q = q;
  89. INIT_LIST_HEAD(&blkg->q_node);
  90. blkg->blkcg = blkcg;
  91. blkg->refcnt = 1;
  92. cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
  93. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  94. struct blkio_policy_type *pol = blkio_policy[i];
  95. struct blkg_policy_data *pd;
  96. if (!pol)
  97. continue;
  98. /* alloc per-policy data and attach it to blkg */
  99. pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
  100. q->node);
  101. if (!pd) {
  102. blkg_free(blkg);
  103. return NULL;
  104. }
  105. blkg->pd[i] = pd;
  106. pd->blkg = blkg;
  107. }
  108. /* invoke per-policy init */
  109. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  110. struct blkio_policy_type *pol = blkio_policy[i];
  111. if (pol)
  112. pol->ops.blkio_init_group_fn(blkg);
  113. }
  114. return blkg;
  115. }
  116. struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  117. struct request_queue *q,
  118. bool for_root)
  119. __releases(q->queue_lock) __acquires(q->queue_lock)
  120. {
  121. struct blkio_group *blkg;
  122. WARN_ON_ONCE(!rcu_read_lock_held());
  123. lockdep_assert_held(q->queue_lock);
  124. /*
  125. * This could be the first entry point of blkcg implementation and
  126. * we shouldn't allow anything to go through for a bypassing queue.
  127. * The following can be removed if blkg lookup is guaranteed to
  128. * fail on a bypassing queue.
  129. */
  130. if (unlikely(blk_queue_bypass(q)) && !for_root)
  131. return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  132. blkg = blkg_lookup(blkcg, q);
  133. if (blkg)
  134. return blkg;
  135. /* blkg holds a reference to blkcg */
  136. if (!css_tryget(&blkcg->css))
  137. return ERR_PTR(-EINVAL);
  138. /*
  139. * Allocate and initialize.
  140. */
  141. blkg = blkg_alloc(blkcg, q);
  142. /* did alloc fail? */
  143. if (unlikely(!blkg)) {
  144. blkg = ERR_PTR(-ENOMEM);
  145. goto out;
  146. }
  147. /* insert */
  148. spin_lock(&blkcg->lock);
  149. hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  150. list_add(&blkg->q_node, &q->blkg_list);
  151. spin_unlock(&blkcg->lock);
  152. out:
  153. return blkg;
  154. }
  155. EXPORT_SYMBOL_GPL(blkg_lookup_create);
  156. /* called under rcu_read_lock(). */
  157. struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  158. struct request_queue *q)
  159. {
  160. struct blkio_group *blkg;
  161. struct hlist_node *n;
  162. hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
  163. if (blkg->q == q)
  164. return blkg;
  165. return NULL;
  166. }
  167. EXPORT_SYMBOL_GPL(blkg_lookup);
  168. static void blkg_destroy(struct blkio_group *blkg)
  169. {
  170. struct request_queue *q = blkg->q;
  171. struct blkio_cgroup *blkcg = blkg->blkcg;
  172. lockdep_assert_held(q->queue_lock);
  173. lockdep_assert_held(&blkcg->lock);
  174. /* Something wrong if we are trying to remove same group twice */
  175. WARN_ON_ONCE(list_empty(&blkg->q_node));
  176. WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
  177. list_del_init(&blkg->q_node);
  178. hlist_del_init_rcu(&blkg->blkcg_node);
  179. /*
  180. * Put the reference taken at the time of creation so that when all
  181. * queues are gone, group can be destroyed.
  182. */
  183. blkg_put(blkg);
  184. }
  185. /*
  186. * XXX: This updates blkg policy data in-place for root blkg, which is
  187. * necessary across elevator switch and policy registration as root blkgs
  188. * aren't shot down. This broken and racy implementation is temporary.
  189. * Eventually, blkg shoot down will be replaced by proper in-place update.
  190. */
  191. void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
  192. {
  193. struct blkio_policy_type *pol = blkio_policy[plid];
  194. struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
  195. struct blkg_policy_data *pd;
  196. if (!blkg)
  197. return;
  198. kfree(blkg->pd[plid]);
  199. blkg->pd[plid] = NULL;
  200. if (!pol)
  201. return;
  202. pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
  203. WARN_ON_ONCE(!pd);
  204. blkg->pd[plid] = pd;
  205. pd->blkg = blkg;
  206. pol->ops.blkio_init_group_fn(blkg);
  207. }
  208. EXPORT_SYMBOL_GPL(update_root_blkg_pd);
  209. /**
  210. * blkg_destroy_all - destroy all blkgs associated with a request_queue
  211. * @q: request_queue of interest
  212. * @destroy_root: whether to destroy root blkg or not
  213. *
  214. * Destroy blkgs associated with @q. If @destroy_root is %true, all are
  215. * destroyed; otherwise, root blkg is left alone.
  216. */
  217. void blkg_destroy_all(struct request_queue *q, bool destroy_root)
  218. {
  219. struct blkio_group *blkg, *n;
  220. spin_lock_irq(q->queue_lock);
  221. list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
  222. struct blkio_cgroup *blkcg = blkg->blkcg;
  223. /* skip root? */
  224. if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
  225. continue;
  226. spin_lock(&blkcg->lock);
  227. blkg_destroy(blkg);
  228. spin_unlock(&blkcg->lock);
  229. }
  230. spin_unlock_irq(q->queue_lock);
  231. }
  232. EXPORT_SYMBOL_GPL(blkg_destroy_all);
  233. static void blkg_rcu_free(struct rcu_head *rcu_head)
  234. {
  235. blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
  236. }
  237. void __blkg_release(struct blkio_group *blkg)
  238. {
  239. /* release the extra blkcg reference this blkg has been holding */
  240. css_put(&blkg->blkcg->css);
  241. /*
  242. * A group is freed in rcu manner. But having an rcu lock does not
  243. * mean that one can access all the fields of blkg and assume these
  244. * are valid. For example, don't try to follow throtl_data and
  245. * request queue links.
  246. *
  247. * Having a reference to blkg under an rcu allows acess to only
  248. * values local to groups like group stats and group rate limits
  249. */
  250. call_rcu(&blkg->rcu_head, blkg_rcu_free);
  251. }
  252. EXPORT_SYMBOL_GPL(__blkg_release);
  253. static int
  254. blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
  255. {
  256. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  257. struct blkio_group *blkg;
  258. struct hlist_node *n;
  259. spin_lock(&blkio_list_lock);
  260. spin_lock_irq(&blkcg->lock);
  261. /*
  262. * Note that stat reset is racy - it doesn't synchronize against
  263. * stat updates. This is a debug feature which shouldn't exist
  264. * anyway. If you get hit by a race, retry.
  265. */
  266. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
  267. struct blkio_policy_type *pol;
  268. list_for_each_entry(pol, &blkio_list, list)
  269. if (pol->ops.blkio_reset_group_stats_fn)
  270. pol->ops.blkio_reset_group_stats_fn(blkg);
  271. }
  272. spin_unlock_irq(&blkcg->lock);
  273. spin_unlock(&blkio_list_lock);
  274. return 0;
  275. }
  276. static const char *blkg_dev_name(struct blkio_group *blkg)
  277. {
  278. /* some drivers (floppy) instantiate a queue w/o disk registered */
  279. if (blkg->q->backing_dev_info.dev)
  280. return dev_name(blkg->q->backing_dev_info.dev);
  281. return NULL;
  282. }
  283. /**
  284. * blkcg_print_blkgs - helper for printing per-blkg data
  285. * @sf: seq_file to print to
  286. * @blkcg: blkcg of interest
  287. * @prfill: fill function to print out a blkg
  288. * @pol: policy in question
  289. * @data: data to be passed to @prfill
  290. * @show_total: to print out sum of prfill return values or not
  291. *
  292. * This function invokes @prfill on each blkg of @blkcg if pd for the
  293. * policy specified by @pol exists. @prfill is invoked with @sf, the
  294. * policy data and @data. If @show_total is %true, the sum of the return
  295. * values from @prfill is printed with "Total" label at the end.
  296. *
  297. * This is to be used to construct print functions for
  298. * cftype->read_seq_string method.
  299. */
  300. void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
  301. u64 (*prfill)(struct seq_file *, void *, int),
  302. int pol, int data, bool show_total)
  303. {
  304. struct blkio_group *blkg;
  305. struct hlist_node *n;
  306. u64 total = 0;
  307. spin_lock_irq(&blkcg->lock);
  308. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
  309. if (blkg->pd[pol])
  310. total += prfill(sf, blkg->pd[pol]->pdata, data);
  311. spin_unlock_irq(&blkcg->lock);
  312. if (show_total)
  313. seq_printf(sf, "Total %llu\n", (unsigned long long)total);
  314. }
  315. EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
  316. /**
  317. * __blkg_prfill_u64 - prfill helper for a single u64 value
  318. * @sf: seq_file to print to
  319. * @pdata: policy private data of interest
  320. * @v: value to print
  321. *
  322. * Print @v to @sf for the device assocaited with @pdata.
  323. */
  324. u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
  325. {
  326. const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
  327. if (!dname)
  328. return 0;
  329. seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
  330. return v;
  331. }
  332. EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
  333. /**
  334. * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  335. * @sf: seq_file to print to
  336. * @pdata: policy private data of interest
  337. * @rwstat: rwstat to print
  338. *
  339. * Print @rwstat to @sf for the device assocaited with @pdata.
  340. */
  341. u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
  342. const struct blkg_rwstat *rwstat)
  343. {
  344. static const char *rwstr[] = {
  345. [BLKG_RWSTAT_READ] = "Read",
  346. [BLKG_RWSTAT_WRITE] = "Write",
  347. [BLKG_RWSTAT_SYNC] = "Sync",
  348. [BLKG_RWSTAT_ASYNC] = "Async",
  349. };
  350. const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
  351. u64 v;
  352. int i;
  353. if (!dname)
  354. return 0;
  355. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  356. seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
  357. (unsigned long long)rwstat->cnt[i]);
  358. v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
  359. seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
  360. return v;
  361. }
  362. /**
  363. * blkg_prfill_stat - prfill callback for blkg_stat
  364. * @sf: seq_file to print to
  365. * @pdata: policy private data of interest
  366. * @off: offset to the blkg_stat in @pdata
  367. *
  368. * prfill callback for printing a blkg_stat.
  369. */
  370. u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
  371. {
  372. return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
  373. }
  374. EXPORT_SYMBOL_GPL(blkg_prfill_stat);
  375. /**
  376. * blkg_prfill_rwstat - prfill callback for blkg_rwstat
  377. * @sf: seq_file to print to
  378. * @pdata: policy private data of interest
  379. * @off: offset to the blkg_rwstat in @pdata
  380. *
  381. * prfill callback for printing a blkg_rwstat.
  382. */
  383. u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
  384. {
  385. struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
  386. return __blkg_prfill_rwstat(sf, pdata, &rwstat);
  387. }
  388. EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
  389. /**
  390. * blkg_conf_prep - parse and prepare for per-blkg config update
  391. * @blkcg: target block cgroup
  392. * @input: input string
  393. * @ctx: blkg_conf_ctx to be filled
  394. *
  395. * Parse per-blkg config update from @input and initialize @ctx with the
  396. * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
  397. * value. This function returns with RCU read locked and must be paired
  398. * with blkg_conf_finish().
  399. */
  400. int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
  401. struct blkg_conf_ctx *ctx)
  402. __acquires(rcu)
  403. {
  404. struct gendisk *disk;
  405. struct blkio_group *blkg;
  406. unsigned int major, minor;
  407. unsigned long long v;
  408. int part, ret;
  409. if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
  410. return -EINVAL;
  411. disk = get_gendisk(MKDEV(major, minor), &part);
  412. if (!disk || part)
  413. return -EINVAL;
  414. rcu_read_lock();
  415. spin_lock_irq(disk->queue->queue_lock);
  416. blkg = blkg_lookup_create(blkcg, disk->queue, false);
  417. spin_unlock_irq(disk->queue->queue_lock);
  418. if (IS_ERR(blkg)) {
  419. ret = PTR_ERR(blkg);
  420. rcu_read_unlock();
  421. put_disk(disk);
  422. /*
  423. * If queue was bypassing, we should retry. Do so after a
  424. * short msleep(). It isn't strictly necessary but queue
  425. * can be bypassing for some time and it's always nice to
  426. * avoid busy looping.
  427. */
  428. if (ret == -EBUSY) {
  429. msleep(10);
  430. ret = restart_syscall();
  431. }
  432. return ret;
  433. }
  434. ctx->disk = disk;
  435. ctx->blkg = blkg;
  436. ctx->v = v;
  437. return 0;
  438. }
  439. EXPORT_SYMBOL_GPL(blkg_conf_prep);
  440. /**
  441. * blkg_conf_finish - finish up per-blkg config update
  442. * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
  443. *
  444. * Finish up after per-blkg config update. This function must be paired
  445. * with blkg_conf_prep().
  446. */
  447. void blkg_conf_finish(struct blkg_conf_ctx *ctx)
  448. __releases(rcu)
  449. {
  450. rcu_read_unlock();
  451. put_disk(ctx->disk);
  452. }
  453. EXPORT_SYMBOL_GPL(blkg_conf_finish);
  454. struct cftype blkio_files[] = {
  455. {
  456. .name = "reset_stats",
  457. .write_u64 = blkiocg_reset_stats,
  458. },
  459. { } /* terminate */
  460. };
  461. /**
  462. * blkiocg_pre_destroy - cgroup pre_destroy callback
  463. * @cgroup: cgroup of interest
  464. *
  465. * This function is called when @cgroup is about to go away and responsible
  466. * for shooting down all blkgs associated with @cgroup. blkgs should be
  467. * removed while holding both q and blkcg locks. As blkcg lock is nested
  468. * inside q lock, this function performs reverse double lock dancing.
  469. *
  470. * This is the blkcg counterpart of ioc_release_fn().
  471. */
  472. static int blkiocg_pre_destroy(struct cgroup *cgroup)
  473. {
  474. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  475. spin_lock_irq(&blkcg->lock);
  476. while (!hlist_empty(&blkcg->blkg_list)) {
  477. struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
  478. struct blkio_group, blkcg_node);
  479. struct request_queue *q = blkg->q;
  480. if (spin_trylock(q->queue_lock)) {
  481. blkg_destroy(blkg);
  482. spin_unlock(q->queue_lock);
  483. } else {
  484. spin_unlock_irq(&blkcg->lock);
  485. cpu_relax();
  486. spin_lock_irq(&blkcg->lock);
  487. }
  488. }
  489. spin_unlock_irq(&blkcg->lock);
  490. return 0;
  491. }
  492. static void blkiocg_destroy(struct cgroup *cgroup)
  493. {
  494. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  495. if (blkcg != &blkio_root_cgroup)
  496. kfree(blkcg);
  497. }
  498. static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
  499. {
  500. static atomic64_t id_seq = ATOMIC64_INIT(0);
  501. struct blkio_cgroup *blkcg;
  502. struct cgroup *parent = cgroup->parent;
  503. if (!parent) {
  504. blkcg = &blkio_root_cgroup;
  505. goto done;
  506. }
  507. blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  508. if (!blkcg)
  509. return ERR_PTR(-ENOMEM);
  510. blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
  511. blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
  512. done:
  513. spin_lock_init(&blkcg->lock);
  514. INIT_HLIST_HEAD(&blkcg->blkg_list);
  515. return &blkcg->css;
  516. }
  517. /**
  518. * blkcg_init_queue - initialize blkcg part of request queue
  519. * @q: request_queue to initialize
  520. *
  521. * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
  522. * part of new request_queue @q.
  523. *
  524. * RETURNS:
  525. * 0 on success, -errno on failure.
  526. */
  527. int blkcg_init_queue(struct request_queue *q)
  528. {
  529. int ret;
  530. might_sleep();
  531. ret = blk_throtl_init(q);
  532. if (ret)
  533. return ret;
  534. mutex_lock(&all_q_mutex);
  535. INIT_LIST_HEAD(&q->all_q_node);
  536. list_add_tail(&q->all_q_node, &all_q_list);
  537. mutex_unlock(&all_q_mutex);
  538. return 0;
  539. }
  540. /**
  541. * blkcg_drain_queue - drain blkcg part of request_queue
  542. * @q: request_queue to drain
  543. *
  544. * Called from blk_drain_queue(). Responsible for draining blkcg part.
  545. */
  546. void blkcg_drain_queue(struct request_queue *q)
  547. {
  548. lockdep_assert_held(q->queue_lock);
  549. blk_throtl_drain(q);
  550. }
  551. /**
  552. * blkcg_exit_queue - exit and release blkcg part of request_queue
  553. * @q: request_queue being released
  554. *
  555. * Called from blk_release_queue(). Responsible for exiting blkcg part.
  556. */
  557. void blkcg_exit_queue(struct request_queue *q)
  558. {
  559. mutex_lock(&all_q_mutex);
  560. list_del_init(&q->all_q_node);
  561. mutex_unlock(&all_q_mutex);
  562. blkg_destroy_all(q, true);
  563. blk_throtl_exit(q);
  564. }
  565. /*
  566. * We cannot support shared io contexts, as we have no mean to support
  567. * two tasks with the same ioc in two different groups without major rework
  568. * of the main cic data structures. For now we allow a task to change
  569. * its cgroup only if it's the only owner of its ioc.
  570. */
  571. static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  572. {
  573. struct task_struct *task;
  574. struct io_context *ioc;
  575. int ret = 0;
  576. /* task_lock() is needed to avoid races with exit_io_context() */
  577. cgroup_taskset_for_each(task, cgrp, tset) {
  578. task_lock(task);
  579. ioc = task->io_context;
  580. if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  581. ret = -EINVAL;
  582. task_unlock(task);
  583. if (ret)
  584. break;
  585. }
  586. return ret;
  587. }
  588. static void blkcg_bypass_start(void)
  589. __acquires(&all_q_mutex)
  590. {
  591. struct request_queue *q;
  592. mutex_lock(&all_q_mutex);
  593. list_for_each_entry(q, &all_q_list, all_q_node) {
  594. blk_queue_bypass_start(q);
  595. blkg_destroy_all(q, false);
  596. }
  597. }
  598. static void blkcg_bypass_end(void)
  599. __releases(&all_q_mutex)
  600. {
  601. struct request_queue *q;
  602. list_for_each_entry(q, &all_q_list, all_q_node)
  603. blk_queue_bypass_end(q);
  604. mutex_unlock(&all_q_mutex);
  605. }
  606. struct cgroup_subsys blkio_subsys = {
  607. .name = "blkio",
  608. .create = blkiocg_create,
  609. .can_attach = blkiocg_can_attach,
  610. .pre_destroy = blkiocg_pre_destroy,
  611. .destroy = blkiocg_destroy,
  612. .subsys_id = blkio_subsys_id,
  613. .base_cftypes = blkio_files,
  614. .module = THIS_MODULE,
  615. };
  616. EXPORT_SYMBOL_GPL(blkio_subsys);
  617. void blkio_policy_register(struct blkio_policy_type *blkiop)
  618. {
  619. struct request_queue *q;
  620. blkcg_bypass_start();
  621. spin_lock(&blkio_list_lock);
  622. BUG_ON(blkio_policy[blkiop->plid]);
  623. blkio_policy[blkiop->plid] = blkiop;
  624. list_add_tail(&blkiop->list, &blkio_list);
  625. spin_unlock(&blkio_list_lock);
  626. list_for_each_entry(q, &all_q_list, all_q_node)
  627. update_root_blkg_pd(q, blkiop->plid);
  628. blkcg_bypass_end();
  629. if (blkiop->cftypes)
  630. WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
  631. }
  632. EXPORT_SYMBOL_GPL(blkio_policy_register);
  633. void blkio_policy_unregister(struct blkio_policy_type *blkiop)
  634. {
  635. struct request_queue *q;
  636. if (blkiop->cftypes)
  637. cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
  638. blkcg_bypass_start();
  639. spin_lock(&blkio_list_lock);
  640. BUG_ON(blkio_policy[blkiop->plid] != blkiop);
  641. blkio_policy[blkiop->plid] = NULL;
  642. list_del_init(&blkiop->list);
  643. spin_unlock(&blkio_list_lock);
  644. list_for_each_entry(q, &all_q_list, all_q_node)
  645. update_root_blkg_pd(q, blkiop->plid);
  646. blkcg_bypass_end();
  647. }
  648. EXPORT_SYMBOL_GPL(blkio_policy_unregister);