blk-cgroup.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. /*
  2. * Common Block IO controller cgroup interface
  3. *
  4. * Based on ideas and code from CFQ, CFS and BFQ:
  5. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  6. *
  7. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  8. * Paolo Valente <paolo.valente@unimore.it>
  9. *
  10. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11. * Nauman Rafique <nauman@google.com>
  12. */
  13. #include <linux/ioprio.h>
  14. #include <linux/kdev_t.h>
  15. #include <linux/module.h>
  16. #include <linux/err.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/slab.h>
  19. #include <linux/genhd.h>
  20. #include <linux/delay.h>
  21. #include <linux/atomic.h>
  22. #include "blk-cgroup.h"
  23. #include "blk.h"
  24. #define MAX_KEY_LEN 100
  25. static DEFINE_MUTEX(blkcg_pol_mutex);
  26. static DEFINE_MUTEX(all_q_mutex);
  27. static LIST_HEAD(all_q_list);
  28. struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
  29. EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  30. static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
  31. struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  32. {
  33. return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
  34. struct blkio_cgroup, css);
  35. }
  36. EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  37. static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
  38. {
  39. return container_of(task_subsys_state(tsk, blkio_subsys_id),
  40. struct blkio_cgroup, css);
  41. }
  42. struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
  43. {
  44. if (bio && bio->bi_css)
  45. return container_of(bio->bi_css, struct blkio_cgroup, css);
  46. return task_blkio_cgroup(current);
  47. }
  48. EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
  49. /**
  50. * blkg_free - free a blkg
  51. * @blkg: blkg to free
  52. *
  53. * Free @blkg which may be partially allocated.
  54. */
  55. static void blkg_free(struct blkio_group *blkg)
  56. {
  57. int i;
  58. if (!blkg)
  59. return;
  60. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  61. struct blkio_policy_type *pol = blkio_policy[i];
  62. struct blkg_policy_data *pd = blkg->pd[i];
  63. if (!pd)
  64. continue;
  65. if (pol && pol->ops.blkio_exit_group_fn)
  66. pol->ops.blkio_exit_group_fn(blkg);
  67. kfree(pd);
  68. }
  69. kfree(blkg);
  70. }
  71. /**
  72. * blkg_alloc - allocate a blkg
  73. * @blkcg: block cgroup the new blkg is associated with
  74. * @q: request_queue the new blkg is associated with
  75. *
  76. * Allocate a new blkg assocating @blkcg and @q.
  77. */
  78. static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
  79. struct request_queue *q)
  80. {
  81. struct blkio_group *blkg;
  82. int i;
  83. /* alloc and init base part */
  84. blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
  85. if (!blkg)
  86. return NULL;
  87. blkg->q = q;
  88. INIT_LIST_HEAD(&blkg->q_node);
  89. blkg->blkcg = blkcg;
  90. blkg->refcnt = 1;
  91. cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
  92. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  93. struct blkio_policy_type *pol = blkio_policy[i];
  94. struct blkg_policy_data *pd;
  95. if (!pol)
  96. continue;
  97. /* alloc per-policy data and attach it to blkg */
  98. pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
  99. q->node);
  100. if (!pd) {
  101. blkg_free(blkg);
  102. return NULL;
  103. }
  104. blkg->pd[i] = pd;
  105. pd->blkg = blkg;
  106. }
  107. /* invoke per-policy init */
  108. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  109. struct blkio_policy_type *pol = blkio_policy[i];
  110. if (pol)
  111. pol->ops.blkio_init_group_fn(blkg);
  112. }
  113. return blkg;
  114. }
  115. struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  116. struct request_queue *q,
  117. bool for_root)
  118. __releases(q->queue_lock) __acquires(q->queue_lock)
  119. {
  120. struct blkio_group *blkg;
  121. WARN_ON_ONCE(!rcu_read_lock_held());
  122. lockdep_assert_held(q->queue_lock);
  123. /*
  124. * This could be the first entry point of blkcg implementation and
  125. * we shouldn't allow anything to go through for a bypassing queue.
  126. * The following can be removed if blkg lookup is guaranteed to
  127. * fail on a bypassing queue.
  128. */
  129. if (unlikely(blk_queue_bypass(q)) && !for_root)
  130. return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  131. blkg = blkg_lookup(blkcg, q);
  132. if (blkg)
  133. return blkg;
  134. /* blkg holds a reference to blkcg */
  135. if (!css_tryget(&blkcg->css))
  136. return ERR_PTR(-EINVAL);
  137. /*
  138. * Allocate and initialize.
  139. */
  140. blkg = blkg_alloc(blkcg, q);
  141. /* did alloc fail? */
  142. if (unlikely(!blkg)) {
  143. blkg = ERR_PTR(-ENOMEM);
  144. goto out;
  145. }
  146. /* insert */
  147. spin_lock(&blkcg->lock);
  148. hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  149. list_add(&blkg->q_node, &q->blkg_list);
  150. spin_unlock(&blkcg->lock);
  151. out:
  152. return blkg;
  153. }
  154. EXPORT_SYMBOL_GPL(blkg_lookup_create);
  155. /* called under rcu_read_lock(). */
  156. struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  157. struct request_queue *q)
  158. {
  159. struct blkio_group *blkg;
  160. struct hlist_node *n;
  161. hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
  162. if (blkg->q == q)
  163. return blkg;
  164. return NULL;
  165. }
  166. EXPORT_SYMBOL_GPL(blkg_lookup);
  167. static void blkg_destroy(struct blkio_group *blkg)
  168. {
  169. struct request_queue *q = blkg->q;
  170. struct blkio_cgroup *blkcg = blkg->blkcg;
  171. lockdep_assert_held(q->queue_lock);
  172. lockdep_assert_held(&blkcg->lock);
  173. /* Something wrong if we are trying to remove same group twice */
  174. WARN_ON_ONCE(list_empty(&blkg->q_node));
  175. WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
  176. list_del_init(&blkg->q_node);
  177. hlist_del_init_rcu(&blkg->blkcg_node);
  178. /*
  179. * Put the reference taken at the time of creation so that when all
  180. * queues are gone, group can be destroyed.
  181. */
  182. blkg_put(blkg);
  183. }
  184. /*
  185. * XXX: This updates blkg policy data in-place for root blkg, which is
  186. * necessary across elevator switch and policy registration as root blkgs
  187. * aren't shot down. This broken and racy implementation is temporary.
  188. * Eventually, blkg shoot down will be replaced by proper in-place update.
  189. */
  190. void update_root_blkg_pd(struct request_queue *q,
  191. const struct blkio_policy_type *pol)
  192. {
  193. struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
  194. struct blkg_policy_data *pd;
  195. if (!blkg)
  196. return;
  197. kfree(blkg->pd[pol->plid]);
  198. blkg->pd[pol->plid] = NULL;
  199. if (!pol)
  200. return;
  201. pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
  202. WARN_ON_ONCE(!pd);
  203. blkg->pd[pol->plid] = pd;
  204. pd->blkg = blkg;
  205. pol->ops.blkio_init_group_fn(blkg);
  206. }
  207. EXPORT_SYMBOL_GPL(update_root_blkg_pd);
  208. /**
  209. * blkg_destroy_all - destroy all blkgs associated with a request_queue
  210. * @q: request_queue of interest
  211. * @destroy_root: whether to destroy root blkg or not
  212. *
  213. * Destroy blkgs associated with @q. If @destroy_root is %true, all are
  214. * destroyed; otherwise, root blkg is left alone.
  215. */
  216. void blkg_destroy_all(struct request_queue *q, bool destroy_root)
  217. {
  218. struct blkio_group *blkg, *n;
  219. spin_lock_irq(q->queue_lock);
  220. list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
  221. struct blkio_cgroup *blkcg = blkg->blkcg;
  222. /* skip root? */
  223. if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
  224. continue;
  225. spin_lock(&blkcg->lock);
  226. blkg_destroy(blkg);
  227. spin_unlock(&blkcg->lock);
  228. }
  229. spin_unlock_irq(q->queue_lock);
  230. }
  231. EXPORT_SYMBOL_GPL(blkg_destroy_all);
  232. static void blkg_rcu_free(struct rcu_head *rcu_head)
  233. {
  234. blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
  235. }
  236. void __blkg_release(struct blkio_group *blkg)
  237. {
  238. /* release the extra blkcg reference this blkg has been holding */
  239. css_put(&blkg->blkcg->css);
  240. /*
  241. * A group is freed in rcu manner. But having an rcu lock does not
  242. * mean that one can access all the fields of blkg and assume these
  243. * are valid. For example, don't try to follow throtl_data and
  244. * request queue links.
  245. *
  246. * Having a reference to blkg under an rcu allows acess to only
  247. * values local to groups like group stats and group rate limits
  248. */
  249. call_rcu(&blkg->rcu_head, blkg_rcu_free);
  250. }
  251. EXPORT_SYMBOL_GPL(__blkg_release);
  252. static int
  253. blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
  254. {
  255. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  256. struct blkio_group *blkg;
  257. struct hlist_node *n;
  258. int i;
  259. mutex_lock(&blkcg_pol_mutex);
  260. spin_lock_irq(&blkcg->lock);
  261. /*
  262. * Note that stat reset is racy - it doesn't synchronize against
  263. * stat updates. This is a debug feature which shouldn't exist
  264. * anyway. If you get hit by a race, retry.
  265. */
  266. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
  267. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  268. struct blkio_policy_type *pol = blkio_policy[i];
  269. if (pol && pol->ops.blkio_reset_group_stats_fn)
  270. pol->ops.blkio_reset_group_stats_fn(blkg);
  271. }
  272. }
  273. spin_unlock_irq(&blkcg->lock);
  274. mutex_unlock(&blkcg_pol_mutex);
  275. return 0;
  276. }
  277. static const char *blkg_dev_name(struct blkio_group *blkg)
  278. {
  279. /* some drivers (floppy) instantiate a queue w/o disk registered */
  280. if (blkg->q->backing_dev_info.dev)
  281. return dev_name(blkg->q->backing_dev_info.dev);
  282. return NULL;
  283. }
  284. /**
  285. * blkcg_print_blkgs - helper for printing per-blkg data
  286. * @sf: seq_file to print to
  287. * @blkcg: blkcg of interest
  288. * @prfill: fill function to print out a blkg
  289. * @pol: policy in question
  290. * @data: data to be passed to @prfill
  291. * @show_total: to print out sum of prfill return values or not
  292. *
  293. * This function invokes @prfill on each blkg of @blkcg if pd for the
  294. * policy specified by @pol exists. @prfill is invoked with @sf, the
  295. * policy data and @data. If @show_total is %true, the sum of the return
  296. * values from @prfill is printed with "Total" label at the end.
  297. *
  298. * This is to be used to construct print functions for
  299. * cftype->read_seq_string method.
  300. */
  301. void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
  302. u64 (*prfill)(struct seq_file *, void *, int),
  303. const struct blkio_policy_type *pol, int data,
  304. bool show_total)
  305. {
  306. struct blkio_group *blkg;
  307. struct hlist_node *n;
  308. u64 total = 0;
  309. spin_lock_irq(&blkcg->lock);
  310. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
  311. if (blkg->pd[pol->plid])
  312. total += prfill(sf, blkg->pd[pol->plid]->pdata, data);
  313. spin_unlock_irq(&blkcg->lock);
  314. if (show_total)
  315. seq_printf(sf, "Total %llu\n", (unsigned long long)total);
  316. }
  317. EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
  318. /**
  319. * __blkg_prfill_u64 - prfill helper for a single u64 value
  320. * @sf: seq_file to print to
  321. * @pdata: policy private data of interest
  322. * @v: value to print
  323. *
  324. * Print @v to @sf for the device assocaited with @pdata.
  325. */
  326. u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
  327. {
  328. const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
  329. if (!dname)
  330. return 0;
  331. seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
  332. return v;
  333. }
  334. EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
  335. /**
  336. * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  337. * @sf: seq_file to print to
  338. * @pdata: policy private data of interest
  339. * @rwstat: rwstat to print
  340. *
  341. * Print @rwstat to @sf for the device assocaited with @pdata.
  342. */
  343. u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
  344. const struct blkg_rwstat *rwstat)
  345. {
  346. static const char *rwstr[] = {
  347. [BLKG_RWSTAT_READ] = "Read",
  348. [BLKG_RWSTAT_WRITE] = "Write",
  349. [BLKG_RWSTAT_SYNC] = "Sync",
  350. [BLKG_RWSTAT_ASYNC] = "Async",
  351. };
  352. const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
  353. u64 v;
  354. int i;
  355. if (!dname)
  356. return 0;
  357. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  358. seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
  359. (unsigned long long)rwstat->cnt[i]);
  360. v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
  361. seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
  362. return v;
  363. }
  364. /**
  365. * blkg_prfill_stat - prfill callback for blkg_stat
  366. * @sf: seq_file to print to
  367. * @pdata: policy private data of interest
  368. * @off: offset to the blkg_stat in @pdata
  369. *
  370. * prfill callback for printing a blkg_stat.
  371. */
  372. u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
  373. {
  374. return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
  375. }
  376. EXPORT_SYMBOL_GPL(blkg_prfill_stat);
  377. /**
  378. * blkg_prfill_rwstat - prfill callback for blkg_rwstat
  379. * @sf: seq_file to print to
  380. * @pdata: policy private data of interest
  381. * @off: offset to the blkg_rwstat in @pdata
  382. *
  383. * prfill callback for printing a blkg_rwstat.
  384. */
  385. u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
  386. {
  387. struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
  388. return __blkg_prfill_rwstat(sf, pdata, &rwstat);
  389. }
  390. EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
  391. /**
  392. * blkg_conf_prep - parse and prepare for per-blkg config update
  393. * @blkcg: target block cgroup
  394. * @input: input string
  395. * @ctx: blkg_conf_ctx to be filled
  396. *
  397. * Parse per-blkg config update from @input and initialize @ctx with the
  398. * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
  399. * value. This function returns with RCU read locked and must be paired
  400. * with blkg_conf_finish().
  401. */
  402. int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
  403. struct blkg_conf_ctx *ctx)
  404. __acquires(rcu)
  405. {
  406. struct gendisk *disk;
  407. struct blkio_group *blkg;
  408. unsigned int major, minor;
  409. unsigned long long v;
  410. int part, ret;
  411. if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
  412. return -EINVAL;
  413. disk = get_gendisk(MKDEV(major, minor), &part);
  414. if (!disk || part)
  415. return -EINVAL;
  416. rcu_read_lock();
  417. spin_lock_irq(disk->queue->queue_lock);
  418. blkg = blkg_lookup_create(blkcg, disk->queue, false);
  419. spin_unlock_irq(disk->queue->queue_lock);
  420. if (IS_ERR(blkg)) {
  421. ret = PTR_ERR(blkg);
  422. rcu_read_unlock();
  423. put_disk(disk);
  424. /*
  425. * If queue was bypassing, we should retry. Do so after a
  426. * short msleep(). It isn't strictly necessary but queue
  427. * can be bypassing for some time and it's always nice to
  428. * avoid busy looping.
  429. */
  430. if (ret == -EBUSY) {
  431. msleep(10);
  432. ret = restart_syscall();
  433. }
  434. return ret;
  435. }
  436. ctx->disk = disk;
  437. ctx->blkg = blkg;
  438. ctx->v = v;
  439. return 0;
  440. }
  441. EXPORT_SYMBOL_GPL(blkg_conf_prep);
  442. /**
  443. * blkg_conf_finish - finish up per-blkg config update
  444. * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
  445. *
  446. * Finish up after per-blkg config update. This function must be paired
  447. * with blkg_conf_prep().
  448. */
  449. void blkg_conf_finish(struct blkg_conf_ctx *ctx)
  450. __releases(rcu)
  451. {
  452. rcu_read_unlock();
  453. put_disk(ctx->disk);
  454. }
  455. EXPORT_SYMBOL_GPL(blkg_conf_finish);
  456. struct cftype blkio_files[] = {
  457. {
  458. .name = "reset_stats",
  459. .write_u64 = blkiocg_reset_stats,
  460. },
  461. { } /* terminate */
  462. };
  463. /**
  464. * blkiocg_pre_destroy - cgroup pre_destroy callback
  465. * @cgroup: cgroup of interest
  466. *
  467. * This function is called when @cgroup is about to go away and responsible
  468. * for shooting down all blkgs associated with @cgroup. blkgs should be
  469. * removed while holding both q and blkcg locks. As blkcg lock is nested
  470. * inside q lock, this function performs reverse double lock dancing.
  471. *
  472. * This is the blkcg counterpart of ioc_release_fn().
  473. */
  474. static int blkiocg_pre_destroy(struct cgroup *cgroup)
  475. {
  476. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  477. spin_lock_irq(&blkcg->lock);
  478. while (!hlist_empty(&blkcg->blkg_list)) {
  479. struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
  480. struct blkio_group, blkcg_node);
  481. struct request_queue *q = blkg->q;
  482. if (spin_trylock(q->queue_lock)) {
  483. blkg_destroy(blkg);
  484. spin_unlock(q->queue_lock);
  485. } else {
  486. spin_unlock_irq(&blkcg->lock);
  487. cpu_relax();
  488. spin_lock_irq(&blkcg->lock);
  489. }
  490. }
  491. spin_unlock_irq(&blkcg->lock);
  492. return 0;
  493. }
  494. static void blkiocg_destroy(struct cgroup *cgroup)
  495. {
  496. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  497. if (blkcg != &blkio_root_cgroup)
  498. kfree(blkcg);
  499. }
  500. static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
  501. {
  502. static atomic64_t id_seq = ATOMIC64_INIT(0);
  503. struct blkio_cgroup *blkcg;
  504. struct cgroup *parent = cgroup->parent;
  505. if (!parent) {
  506. blkcg = &blkio_root_cgroup;
  507. goto done;
  508. }
  509. blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  510. if (!blkcg)
  511. return ERR_PTR(-ENOMEM);
  512. blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
  513. blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
  514. done:
  515. spin_lock_init(&blkcg->lock);
  516. INIT_HLIST_HEAD(&blkcg->blkg_list);
  517. return &blkcg->css;
  518. }
  519. /**
  520. * blkcg_init_queue - initialize blkcg part of request queue
  521. * @q: request_queue to initialize
  522. *
  523. * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
  524. * part of new request_queue @q.
  525. *
  526. * RETURNS:
  527. * 0 on success, -errno on failure.
  528. */
  529. int blkcg_init_queue(struct request_queue *q)
  530. {
  531. int ret;
  532. might_sleep();
  533. ret = blk_throtl_init(q);
  534. if (ret)
  535. return ret;
  536. mutex_lock(&all_q_mutex);
  537. INIT_LIST_HEAD(&q->all_q_node);
  538. list_add_tail(&q->all_q_node, &all_q_list);
  539. mutex_unlock(&all_q_mutex);
  540. return 0;
  541. }
  542. /**
  543. * blkcg_drain_queue - drain blkcg part of request_queue
  544. * @q: request_queue to drain
  545. *
  546. * Called from blk_drain_queue(). Responsible for draining blkcg part.
  547. */
  548. void blkcg_drain_queue(struct request_queue *q)
  549. {
  550. lockdep_assert_held(q->queue_lock);
  551. blk_throtl_drain(q);
  552. }
  553. /**
  554. * blkcg_exit_queue - exit and release blkcg part of request_queue
  555. * @q: request_queue being released
  556. *
  557. * Called from blk_release_queue(). Responsible for exiting blkcg part.
  558. */
  559. void blkcg_exit_queue(struct request_queue *q)
  560. {
  561. mutex_lock(&all_q_mutex);
  562. list_del_init(&q->all_q_node);
  563. mutex_unlock(&all_q_mutex);
  564. blkg_destroy_all(q, true);
  565. blk_throtl_exit(q);
  566. }
  567. /*
  568. * We cannot support shared io contexts, as we have no mean to support
  569. * two tasks with the same ioc in two different groups without major rework
  570. * of the main cic data structures. For now we allow a task to change
  571. * its cgroup only if it's the only owner of its ioc.
  572. */
  573. static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  574. {
  575. struct task_struct *task;
  576. struct io_context *ioc;
  577. int ret = 0;
  578. /* task_lock() is needed to avoid races with exit_io_context() */
  579. cgroup_taskset_for_each(task, cgrp, tset) {
  580. task_lock(task);
  581. ioc = task->io_context;
  582. if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  583. ret = -EINVAL;
  584. task_unlock(task);
  585. if (ret)
  586. break;
  587. }
  588. return ret;
  589. }
  590. static void blkcg_bypass_start(void)
  591. __acquires(&all_q_mutex)
  592. {
  593. struct request_queue *q;
  594. mutex_lock(&all_q_mutex);
  595. list_for_each_entry(q, &all_q_list, all_q_node) {
  596. blk_queue_bypass_start(q);
  597. blkg_destroy_all(q, false);
  598. }
  599. }
  600. static void blkcg_bypass_end(void)
  601. __releases(&all_q_mutex)
  602. {
  603. struct request_queue *q;
  604. list_for_each_entry(q, &all_q_list, all_q_node)
  605. blk_queue_bypass_end(q);
  606. mutex_unlock(&all_q_mutex);
  607. }
  608. struct cgroup_subsys blkio_subsys = {
  609. .name = "blkio",
  610. .create = blkiocg_create,
  611. .can_attach = blkiocg_can_attach,
  612. .pre_destroy = blkiocg_pre_destroy,
  613. .destroy = blkiocg_destroy,
  614. .subsys_id = blkio_subsys_id,
  615. .base_cftypes = blkio_files,
  616. .module = THIS_MODULE,
  617. };
  618. EXPORT_SYMBOL_GPL(blkio_subsys);
  619. void blkio_policy_register(struct blkio_policy_type *blkiop)
  620. {
  621. struct request_queue *q;
  622. mutex_lock(&blkcg_pol_mutex);
  623. blkcg_bypass_start();
  624. BUG_ON(blkio_policy[blkiop->plid]);
  625. blkio_policy[blkiop->plid] = blkiop;
  626. list_for_each_entry(q, &all_q_list, all_q_node)
  627. update_root_blkg_pd(q, blkiop);
  628. blkcg_bypass_end();
  629. if (blkiop->cftypes)
  630. WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
  631. mutex_unlock(&blkcg_pol_mutex);
  632. }
  633. EXPORT_SYMBOL_GPL(blkio_policy_register);
  634. void blkio_policy_unregister(struct blkio_policy_type *blkiop)
  635. {
  636. struct request_queue *q;
  637. mutex_lock(&blkcg_pol_mutex);
  638. if (blkiop->cftypes)
  639. cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
  640. blkcg_bypass_start();
  641. BUG_ON(blkio_policy[blkiop->plid] != blkiop);
  642. blkio_policy[blkiop->plid] = NULL;
  643. list_for_each_entry(q, &all_q_list, all_q_node)
  644. update_root_blkg_pd(q, blkiop);
  645. blkcg_bypass_end();
  646. mutex_unlock(&blkcg_pol_mutex);
  647. }
  648. EXPORT_SYMBOL_GPL(blkio_policy_unregister);