blk-cgroup.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773
  1. /*
  2. * Common Block IO controller cgroup interface
  3. *
  4. * Based on ideas and code from CFQ, CFS and BFQ:
  5. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  6. *
  7. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  8. * Paolo Valente <paolo.valente@unimore.it>
  9. *
  10. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11. * Nauman Rafique <nauman@google.com>
  12. */
  13. #include <linux/ioprio.h>
  14. #include <linux/kdev_t.h>
  15. #include <linux/module.h>
  16. #include <linux/err.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/slab.h>
  19. #include <linux/genhd.h>
  20. #include <linux/delay.h>
  21. #include <linux/atomic.h>
  22. #include "blk-cgroup.h"
  23. #include "blk.h"
  24. #define MAX_KEY_LEN 100
  25. static DEFINE_MUTEX(blkcg_pol_mutex);
  26. static DEFINE_MUTEX(all_q_mutex);
  27. static LIST_HEAD(all_q_list);
  28. struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
  29. EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  30. static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
  31. struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  32. {
  33. return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
  34. struct blkio_cgroup, css);
  35. }
  36. EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  37. static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
  38. {
  39. return container_of(task_subsys_state(tsk, blkio_subsys_id),
  40. struct blkio_cgroup, css);
  41. }
  42. struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
  43. {
  44. if (bio && bio->bi_css)
  45. return container_of(bio->bi_css, struct blkio_cgroup, css);
  46. return task_blkio_cgroup(current);
  47. }
  48. EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
  49. /**
  50. * blkg_free - free a blkg
  51. * @blkg: blkg to free
  52. *
  53. * Free @blkg which may be partially allocated.
  54. */
  55. static void blkg_free(struct blkio_group *blkg)
  56. {
  57. int i;
  58. if (!blkg)
  59. return;
  60. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  61. struct blkio_policy_type *pol = blkio_policy[i];
  62. struct blkg_policy_data *pd = blkg->pd[i];
  63. if (!pd)
  64. continue;
  65. if (pol && pol->ops.blkio_exit_group_fn)
  66. pol->ops.blkio_exit_group_fn(blkg);
  67. kfree(pd);
  68. }
  69. kfree(blkg);
  70. }
  71. /**
  72. * blkg_alloc - allocate a blkg
  73. * @blkcg: block cgroup the new blkg is associated with
  74. * @q: request_queue the new blkg is associated with
  75. *
  76. * Allocate a new blkg assocating @blkcg and @q.
  77. */
  78. static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
  79. struct request_queue *q)
  80. {
  81. struct blkio_group *blkg;
  82. int i;
  83. /* alloc and init base part */
  84. blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
  85. if (!blkg)
  86. return NULL;
  87. blkg->q = q;
  88. INIT_LIST_HEAD(&blkg->q_node);
  89. blkg->blkcg = blkcg;
  90. blkg->refcnt = 1;
  91. cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
  92. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  93. struct blkio_policy_type *pol = blkio_policy[i];
  94. struct blkg_policy_data *pd;
  95. if (!pol)
  96. continue;
  97. /* alloc per-policy data and attach it to blkg */
  98. pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
  99. q->node);
  100. if (!pd) {
  101. blkg_free(blkg);
  102. return NULL;
  103. }
  104. blkg->pd[i] = pd;
  105. pd->blkg = blkg;
  106. }
  107. /* invoke per-policy init */
  108. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  109. struct blkio_policy_type *pol = blkio_policy[i];
  110. if (pol)
  111. pol->ops.blkio_init_group_fn(blkg);
  112. }
  113. return blkg;
  114. }
  115. struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  116. struct request_queue *q,
  117. bool for_root)
  118. __releases(q->queue_lock) __acquires(q->queue_lock)
  119. {
  120. struct blkio_group *blkg;
  121. WARN_ON_ONCE(!rcu_read_lock_held());
  122. lockdep_assert_held(q->queue_lock);
  123. /*
  124. * This could be the first entry point of blkcg implementation and
  125. * we shouldn't allow anything to go through for a bypassing queue.
  126. * The following can be removed if blkg lookup is guaranteed to
  127. * fail on a bypassing queue.
  128. */
  129. if (unlikely(blk_queue_bypass(q)) && !for_root)
  130. return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  131. blkg = blkg_lookup(blkcg, q);
  132. if (blkg)
  133. return blkg;
  134. /* blkg holds a reference to blkcg */
  135. if (!css_tryget(&blkcg->css))
  136. return ERR_PTR(-EINVAL);
  137. /*
  138. * Allocate and initialize.
  139. */
  140. blkg = blkg_alloc(blkcg, q);
  141. /* did alloc fail? */
  142. if (unlikely(!blkg)) {
  143. blkg = ERR_PTR(-ENOMEM);
  144. goto out;
  145. }
  146. /* insert */
  147. spin_lock(&blkcg->lock);
  148. hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  149. list_add(&blkg->q_node, &q->blkg_list);
  150. spin_unlock(&blkcg->lock);
  151. out:
  152. return blkg;
  153. }
  154. EXPORT_SYMBOL_GPL(blkg_lookup_create);
  155. /* called under rcu_read_lock(). */
  156. struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  157. struct request_queue *q)
  158. {
  159. struct blkio_group *blkg;
  160. struct hlist_node *n;
  161. hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
  162. if (blkg->q == q)
  163. return blkg;
  164. return NULL;
  165. }
  166. EXPORT_SYMBOL_GPL(blkg_lookup);
  167. static void blkg_destroy(struct blkio_group *blkg)
  168. {
  169. struct request_queue *q = blkg->q;
  170. struct blkio_cgroup *blkcg = blkg->blkcg;
  171. lockdep_assert_held(q->queue_lock);
  172. lockdep_assert_held(&blkcg->lock);
  173. /* Something wrong if we are trying to remove same group twice */
  174. WARN_ON_ONCE(list_empty(&blkg->q_node));
  175. WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
  176. list_del_init(&blkg->q_node);
  177. hlist_del_init_rcu(&blkg->blkcg_node);
  178. /*
  179. * Put the reference taken at the time of creation so that when all
  180. * queues are gone, group can be destroyed.
  181. */
  182. blkg_put(blkg);
  183. }
  184. /*
  185. * XXX: This updates blkg policy data in-place for root blkg, which is
  186. * necessary across elevator switch and policy registration as root blkgs
  187. * aren't shot down. This broken and racy implementation is temporary.
  188. * Eventually, blkg shoot down will be replaced by proper in-place update.
  189. */
  190. void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
  191. {
  192. struct blkio_policy_type *pol = blkio_policy[plid];
  193. struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
  194. struct blkg_policy_data *pd;
  195. if (!blkg)
  196. return;
  197. kfree(blkg->pd[plid]);
  198. blkg->pd[plid] = NULL;
  199. if (!pol)
  200. return;
  201. pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
  202. WARN_ON_ONCE(!pd);
  203. blkg->pd[plid] = pd;
  204. pd->blkg = blkg;
  205. pol->ops.blkio_init_group_fn(blkg);
  206. }
  207. EXPORT_SYMBOL_GPL(update_root_blkg_pd);
  208. /**
  209. * blkg_destroy_all - destroy all blkgs associated with a request_queue
  210. * @q: request_queue of interest
  211. * @destroy_root: whether to destroy root blkg or not
  212. *
  213. * Destroy blkgs associated with @q. If @destroy_root is %true, all are
  214. * destroyed; otherwise, root blkg is left alone.
  215. */
  216. void blkg_destroy_all(struct request_queue *q, bool destroy_root)
  217. {
  218. struct blkio_group *blkg, *n;
  219. spin_lock_irq(q->queue_lock);
  220. list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
  221. struct blkio_cgroup *blkcg = blkg->blkcg;
  222. /* skip root? */
  223. if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
  224. continue;
  225. spin_lock(&blkcg->lock);
  226. blkg_destroy(blkg);
  227. spin_unlock(&blkcg->lock);
  228. }
  229. spin_unlock_irq(q->queue_lock);
  230. }
  231. EXPORT_SYMBOL_GPL(blkg_destroy_all);
  232. static void blkg_rcu_free(struct rcu_head *rcu_head)
  233. {
  234. blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
  235. }
  236. void __blkg_release(struct blkio_group *blkg)
  237. {
  238. /* release the extra blkcg reference this blkg has been holding */
  239. css_put(&blkg->blkcg->css);
  240. /*
  241. * A group is freed in rcu manner. But having an rcu lock does not
  242. * mean that one can access all the fields of blkg and assume these
  243. * are valid. For example, don't try to follow throtl_data and
  244. * request queue links.
  245. *
  246. * Having a reference to blkg under an rcu allows acess to only
  247. * values local to groups like group stats and group rate limits
  248. */
  249. call_rcu(&blkg->rcu_head, blkg_rcu_free);
  250. }
  251. EXPORT_SYMBOL_GPL(__blkg_release);
  252. static int
  253. blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
  254. {
  255. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  256. struct blkio_group *blkg;
  257. struct hlist_node *n;
  258. int i;
  259. mutex_lock(&blkcg_pol_mutex);
  260. spin_lock_irq(&blkcg->lock);
  261. /*
  262. * Note that stat reset is racy - it doesn't synchronize against
  263. * stat updates. This is a debug feature which shouldn't exist
  264. * anyway. If you get hit by a race, retry.
  265. */
  266. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
  267. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  268. struct blkio_policy_type *pol = blkio_policy[i];
  269. if (pol && pol->ops.blkio_reset_group_stats_fn)
  270. pol->ops.blkio_reset_group_stats_fn(blkg);
  271. }
  272. }
  273. spin_unlock_irq(&blkcg->lock);
  274. mutex_unlock(&blkcg_pol_mutex);
  275. return 0;
  276. }
  277. static const char *blkg_dev_name(struct blkio_group *blkg)
  278. {
  279. /* some drivers (floppy) instantiate a queue w/o disk registered */
  280. if (blkg->q->backing_dev_info.dev)
  281. return dev_name(blkg->q->backing_dev_info.dev);
  282. return NULL;
  283. }
  284. /**
  285. * blkcg_print_blkgs - helper for printing per-blkg data
  286. * @sf: seq_file to print to
  287. * @blkcg: blkcg of interest
  288. * @prfill: fill function to print out a blkg
  289. * @pol: policy in question
  290. * @data: data to be passed to @prfill
  291. * @show_total: to print out sum of prfill return values or not
  292. *
  293. * This function invokes @prfill on each blkg of @blkcg if pd for the
  294. * policy specified by @pol exists. @prfill is invoked with @sf, the
  295. * policy data and @data. If @show_total is %true, the sum of the return
  296. * values from @prfill is printed with "Total" label at the end.
  297. *
  298. * This is to be used to construct print functions for
  299. * cftype->read_seq_string method.
  300. */
  301. void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
  302. u64 (*prfill)(struct seq_file *, void *, int),
  303. int pol, int data, bool show_total)
  304. {
  305. struct blkio_group *blkg;
  306. struct hlist_node *n;
  307. u64 total = 0;
  308. spin_lock_irq(&blkcg->lock);
  309. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
  310. if (blkg->pd[pol])
  311. total += prfill(sf, blkg->pd[pol]->pdata, data);
  312. spin_unlock_irq(&blkcg->lock);
  313. if (show_total)
  314. seq_printf(sf, "Total %llu\n", (unsigned long long)total);
  315. }
  316. EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
  317. /**
  318. * __blkg_prfill_u64 - prfill helper for a single u64 value
  319. * @sf: seq_file to print to
  320. * @pdata: policy private data of interest
  321. * @v: value to print
  322. *
  323. * Print @v to @sf for the device assocaited with @pdata.
  324. */
  325. u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
  326. {
  327. const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
  328. if (!dname)
  329. return 0;
  330. seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
  331. return v;
  332. }
  333. EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
  334. /**
  335. * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  336. * @sf: seq_file to print to
  337. * @pdata: policy private data of interest
  338. * @rwstat: rwstat to print
  339. *
  340. * Print @rwstat to @sf for the device assocaited with @pdata.
  341. */
  342. u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
  343. const struct blkg_rwstat *rwstat)
  344. {
  345. static const char *rwstr[] = {
  346. [BLKG_RWSTAT_READ] = "Read",
  347. [BLKG_RWSTAT_WRITE] = "Write",
  348. [BLKG_RWSTAT_SYNC] = "Sync",
  349. [BLKG_RWSTAT_ASYNC] = "Async",
  350. };
  351. const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
  352. u64 v;
  353. int i;
  354. if (!dname)
  355. return 0;
  356. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  357. seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
  358. (unsigned long long)rwstat->cnt[i]);
  359. v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
  360. seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
  361. return v;
  362. }
  363. /**
  364. * blkg_prfill_stat - prfill callback for blkg_stat
  365. * @sf: seq_file to print to
  366. * @pdata: policy private data of interest
  367. * @off: offset to the blkg_stat in @pdata
  368. *
  369. * prfill callback for printing a blkg_stat.
  370. */
  371. u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
  372. {
  373. return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
  374. }
  375. EXPORT_SYMBOL_GPL(blkg_prfill_stat);
  376. /**
  377. * blkg_prfill_rwstat - prfill callback for blkg_rwstat
  378. * @sf: seq_file to print to
  379. * @pdata: policy private data of interest
  380. * @off: offset to the blkg_rwstat in @pdata
  381. *
  382. * prfill callback for printing a blkg_rwstat.
  383. */
  384. u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
  385. {
  386. struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
  387. return __blkg_prfill_rwstat(sf, pdata, &rwstat);
  388. }
  389. EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
  390. /**
  391. * blkg_conf_prep - parse and prepare for per-blkg config update
  392. * @blkcg: target block cgroup
  393. * @input: input string
  394. * @ctx: blkg_conf_ctx to be filled
  395. *
  396. * Parse per-blkg config update from @input and initialize @ctx with the
  397. * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
  398. * value. This function returns with RCU read locked and must be paired
  399. * with blkg_conf_finish().
  400. */
  401. int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
  402. struct blkg_conf_ctx *ctx)
  403. __acquires(rcu)
  404. {
  405. struct gendisk *disk;
  406. struct blkio_group *blkg;
  407. unsigned int major, minor;
  408. unsigned long long v;
  409. int part, ret;
  410. if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
  411. return -EINVAL;
  412. disk = get_gendisk(MKDEV(major, minor), &part);
  413. if (!disk || part)
  414. return -EINVAL;
  415. rcu_read_lock();
  416. spin_lock_irq(disk->queue->queue_lock);
  417. blkg = blkg_lookup_create(blkcg, disk->queue, false);
  418. spin_unlock_irq(disk->queue->queue_lock);
  419. if (IS_ERR(blkg)) {
  420. ret = PTR_ERR(blkg);
  421. rcu_read_unlock();
  422. put_disk(disk);
  423. /*
  424. * If queue was bypassing, we should retry. Do so after a
  425. * short msleep(). It isn't strictly necessary but queue
  426. * can be bypassing for some time and it's always nice to
  427. * avoid busy looping.
  428. */
  429. if (ret == -EBUSY) {
  430. msleep(10);
  431. ret = restart_syscall();
  432. }
  433. return ret;
  434. }
  435. ctx->disk = disk;
  436. ctx->blkg = blkg;
  437. ctx->v = v;
  438. return 0;
  439. }
  440. EXPORT_SYMBOL_GPL(blkg_conf_prep);
  441. /**
  442. * blkg_conf_finish - finish up per-blkg config update
  443. * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
  444. *
  445. * Finish up after per-blkg config update. This function must be paired
  446. * with blkg_conf_prep().
  447. */
  448. void blkg_conf_finish(struct blkg_conf_ctx *ctx)
  449. __releases(rcu)
  450. {
  451. rcu_read_unlock();
  452. put_disk(ctx->disk);
  453. }
  454. EXPORT_SYMBOL_GPL(blkg_conf_finish);
  455. struct cftype blkio_files[] = {
  456. {
  457. .name = "reset_stats",
  458. .write_u64 = blkiocg_reset_stats,
  459. },
  460. { } /* terminate */
  461. };
  462. /**
  463. * blkiocg_pre_destroy - cgroup pre_destroy callback
  464. * @cgroup: cgroup of interest
  465. *
  466. * This function is called when @cgroup is about to go away and responsible
  467. * for shooting down all blkgs associated with @cgroup. blkgs should be
  468. * removed while holding both q and blkcg locks. As blkcg lock is nested
  469. * inside q lock, this function performs reverse double lock dancing.
  470. *
  471. * This is the blkcg counterpart of ioc_release_fn().
  472. */
  473. static int blkiocg_pre_destroy(struct cgroup *cgroup)
  474. {
  475. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  476. spin_lock_irq(&blkcg->lock);
  477. while (!hlist_empty(&blkcg->blkg_list)) {
  478. struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
  479. struct blkio_group, blkcg_node);
  480. struct request_queue *q = blkg->q;
  481. if (spin_trylock(q->queue_lock)) {
  482. blkg_destroy(blkg);
  483. spin_unlock(q->queue_lock);
  484. } else {
  485. spin_unlock_irq(&blkcg->lock);
  486. cpu_relax();
  487. spin_lock_irq(&blkcg->lock);
  488. }
  489. }
  490. spin_unlock_irq(&blkcg->lock);
  491. return 0;
  492. }
  493. static void blkiocg_destroy(struct cgroup *cgroup)
  494. {
  495. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  496. if (blkcg != &blkio_root_cgroup)
  497. kfree(blkcg);
  498. }
  499. static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
  500. {
  501. static atomic64_t id_seq = ATOMIC64_INIT(0);
  502. struct blkio_cgroup *blkcg;
  503. struct cgroup *parent = cgroup->parent;
  504. if (!parent) {
  505. blkcg = &blkio_root_cgroup;
  506. goto done;
  507. }
  508. blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  509. if (!blkcg)
  510. return ERR_PTR(-ENOMEM);
  511. blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
  512. blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
  513. done:
  514. spin_lock_init(&blkcg->lock);
  515. INIT_HLIST_HEAD(&blkcg->blkg_list);
  516. return &blkcg->css;
  517. }
  518. /**
  519. * blkcg_init_queue - initialize blkcg part of request queue
  520. * @q: request_queue to initialize
  521. *
  522. * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
  523. * part of new request_queue @q.
  524. *
  525. * RETURNS:
  526. * 0 on success, -errno on failure.
  527. */
  528. int blkcg_init_queue(struct request_queue *q)
  529. {
  530. int ret;
  531. might_sleep();
  532. ret = blk_throtl_init(q);
  533. if (ret)
  534. return ret;
  535. mutex_lock(&all_q_mutex);
  536. INIT_LIST_HEAD(&q->all_q_node);
  537. list_add_tail(&q->all_q_node, &all_q_list);
  538. mutex_unlock(&all_q_mutex);
  539. return 0;
  540. }
  541. /**
  542. * blkcg_drain_queue - drain blkcg part of request_queue
  543. * @q: request_queue to drain
  544. *
  545. * Called from blk_drain_queue(). Responsible for draining blkcg part.
  546. */
  547. void blkcg_drain_queue(struct request_queue *q)
  548. {
  549. lockdep_assert_held(q->queue_lock);
  550. blk_throtl_drain(q);
  551. }
  552. /**
  553. * blkcg_exit_queue - exit and release blkcg part of request_queue
  554. * @q: request_queue being released
  555. *
  556. * Called from blk_release_queue(). Responsible for exiting blkcg part.
  557. */
  558. void blkcg_exit_queue(struct request_queue *q)
  559. {
  560. mutex_lock(&all_q_mutex);
  561. list_del_init(&q->all_q_node);
  562. mutex_unlock(&all_q_mutex);
  563. blkg_destroy_all(q, true);
  564. blk_throtl_exit(q);
  565. }
  566. /*
  567. * We cannot support shared io contexts, as we have no mean to support
  568. * two tasks with the same ioc in two different groups without major rework
  569. * of the main cic data structures. For now we allow a task to change
  570. * its cgroup only if it's the only owner of its ioc.
  571. */
  572. static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  573. {
  574. struct task_struct *task;
  575. struct io_context *ioc;
  576. int ret = 0;
  577. /* task_lock() is needed to avoid races with exit_io_context() */
  578. cgroup_taskset_for_each(task, cgrp, tset) {
  579. task_lock(task);
  580. ioc = task->io_context;
  581. if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  582. ret = -EINVAL;
  583. task_unlock(task);
  584. if (ret)
  585. break;
  586. }
  587. return ret;
  588. }
  589. static void blkcg_bypass_start(void)
  590. __acquires(&all_q_mutex)
  591. {
  592. struct request_queue *q;
  593. mutex_lock(&all_q_mutex);
  594. list_for_each_entry(q, &all_q_list, all_q_node) {
  595. blk_queue_bypass_start(q);
  596. blkg_destroy_all(q, false);
  597. }
  598. }
  599. static void blkcg_bypass_end(void)
  600. __releases(&all_q_mutex)
  601. {
  602. struct request_queue *q;
  603. list_for_each_entry(q, &all_q_list, all_q_node)
  604. blk_queue_bypass_end(q);
  605. mutex_unlock(&all_q_mutex);
  606. }
  607. struct cgroup_subsys blkio_subsys = {
  608. .name = "blkio",
  609. .create = blkiocg_create,
  610. .can_attach = blkiocg_can_attach,
  611. .pre_destroy = blkiocg_pre_destroy,
  612. .destroy = blkiocg_destroy,
  613. .subsys_id = blkio_subsys_id,
  614. .base_cftypes = blkio_files,
  615. .module = THIS_MODULE,
  616. };
  617. EXPORT_SYMBOL_GPL(blkio_subsys);
  618. void blkio_policy_register(struct blkio_policy_type *blkiop)
  619. {
  620. struct request_queue *q;
  621. mutex_lock(&blkcg_pol_mutex);
  622. blkcg_bypass_start();
  623. BUG_ON(blkio_policy[blkiop->plid]);
  624. blkio_policy[blkiop->plid] = blkiop;
  625. list_for_each_entry(q, &all_q_list, all_q_node)
  626. update_root_blkg_pd(q, blkiop->plid);
  627. blkcg_bypass_end();
  628. if (blkiop->cftypes)
  629. WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
  630. mutex_unlock(&blkcg_pol_mutex);
  631. }
  632. EXPORT_SYMBOL_GPL(blkio_policy_register);
  633. void blkio_policy_unregister(struct blkio_policy_type *blkiop)
  634. {
  635. struct request_queue *q;
  636. mutex_lock(&blkcg_pol_mutex);
  637. if (blkiop->cftypes)
  638. cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
  639. blkcg_bypass_start();
  640. BUG_ON(blkio_policy[blkiop->plid] != blkiop);
  641. blkio_policy[blkiop->plid] = NULL;
  642. list_for_each_entry(q, &all_q_list, all_q_node)
  643. update_root_blkg_pd(q, blkiop->plid);
  644. blkcg_bypass_end();
  645. mutex_unlock(&blkcg_pol_mutex);
  646. }
  647. EXPORT_SYMBOL_GPL(blkio_policy_unregister);