blk-cgroup.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878
  1. /*
  2. * Common Block IO controller cgroup interface
  3. *
  4. * Based on ideas and code from CFQ, CFS and BFQ:
  5. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  6. *
  7. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  8. * Paolo Valente <paolo.valente@unimore.it>
  9. *
  10. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11. * Nauman Rafique <nauman@google.com>
  12. */
  13. #include <linux/ioprio.h>
  14. #include <linux/kdev_t.h>
  15. #include <linux/module.h>
  16. #include <linux/err.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/slab.h>
  19. #include <linux/genhd.h>
  20. #include <linux/delay.h>
  21. #include <linux/atomic.h>
  22. #include "blk-cgroup.h"
  23. #include "blk.h"
  24. #define MAX_KEY_LEN 100
  25. static DEFINE_SPINLOCK(blkio_list_lock);
  26. static LIST_HEAD(blkio_list);
  27. static DEFINE_MUTEX(all_q_mutex);
  28. static LIST_HEAD(all_q_list);
  29. /* List of groups pending per cpu stats allocation */
  30. static DEFINE_SPINLOCK(alloc_list_lock);
  31. static LIST_HEAD(alloc_list);
  32. static void blkio_stat_alloc_fn(struct work_struct *);
  33. static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
  34. struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
  35. EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  36. static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
  37. struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  38. {
  39. return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
  40. struct blkio_cgroup, css);
  41. }
  42. EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  43. static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
  44. {
  45. return container_of(task_subsys_state(tsk, blkio_subsys_id),
  46. struct blkio_cgroup, css);
  47. }
  48. struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
  49. {
  50. if (bio && bio->bi_css)
  51. return container_of(bio->bi_css, struct blkio_cgroup, css);
  52. return task_blkio_cgroup(current);
  53. }
  54. EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
  55. /*
  56. * Worker for allocating per cpu stat for blk groups. This is scheduled on
  57. * the system_nrt_wq once there are some groups on the alloc_list waiting
  58. * for allocation.
  59. */
  60. static void blkio_stat_alloc_fn(struct work_struct *work)
  61. {
  62. static void *pcpu_stats[BLKIO_NR_POLICIES];
  63. struct delayed_work *dwork = to_delayed_work(work);
  64. struct blkio_group *blkg;
  65. int i;
  66. bool empty = false;
  67. alloc_stats:
  68. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  69. if (pcpu_stats[i] != NULL)
  70. continue;
  71. pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
  72. /* Allocation failed. Try again after some time. */
  73. if (pcpu_stats[i] == NULL) {
  74. queue_delayed_work(system_nrt_wq, dwork,
  75. msecs_to_jiffies(10));
  76. return;
  77. }
  78. }
  79. spin_lock_irq(&blkio_list_lock);
  80. spin_lock(&alloc_list_lock);
  81. /* cgroup got deleted or queue exited. */
  82. if (!list_empty(&alloc_list)) {
  83. blkg = list_first_entry(&alloc_list, struct blkio_group,
  84. alloc_node);
  85. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  86. struct blkg_policy_data *pd = blkg->pd[i];
  87. if (blkio_policy[i] && pd && !pd->stats_cpu)
  88. swap(pd->stats_cpu, pcpu_stats[i]);
  89. }
  90. list_del_init(&blkg->alloc_node);
  91. }
  92. empty = list_empty(&alloc_list);
  93. spin_unlock(&alloc_list_lock);
  94. spin_unlock_irq(&blkio_list_lock);
  95. if (!empty)
  96. goto alloc_stats;
  97. }
  98. /**
  99. * blkg_free - free a blkg
  100. * @blkg: blkg to free
  101. *
  102. * Free @blkg which may be partially allocated.
  103. */
  104. static void blkg_free(struct blkio_group *blkg)
  105. {
  106. int i;
  107. if (!blkg)
  108. return;
  109. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  110. struct blkio_policy_type *pol = blkio_policy[i];
  111. struct blkg_policy_data *pd = blkg->pd[i];
  112. if (!pd)
  113. continue;
  114. if (pol && pol->ops.blkio_exit_group_fn)
  115. pol->ops.blkio_exit_group_fn(blkg);
  116. free_percpu(pd->stats_cpu);
  117. kfree(pd);
  118. }
  119. kfree(blkg);
  120. }
  121. /**
  122. * blkg_alloc - allocate a blkg
  123. * @blkcg: block cgroup the new blkg is associated with
  124. * @q: request_queue the new blkg is associated with
  125. *
  126. * Allocate a new blkg assocating @blkcg and @q.
  127. */
  128. static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
  129. struct request_queue *q)
  130. {
  131. struct blkio_group *blkg;
  132. int i;
  133. /* alloc and init base part */
  134. blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
  135. if (!blkg)
  136. return NULL;
  137. blkg->q = q;
  138. INIT_LIST_HEAD(&blkg->q_node);
  139. INIT_LIST_HEAD(&blkg->alloc_node);
  140. blkg->blkcg = blkcg;
  141. blkg->refcnt = 1;
  142. cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
  143. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  144. struct blkio_policy_type *pol = blkio_policy[i];
  145. struct blkg_policy_data *pd;
  146. if (!pol)
  147. continue;
  148. /* alloc per-policy data and attach it to blkg */
  149. pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
  150. q->node);
  151. if (!pd) {
  152. blkg_free(blkg);
  153. return NULL;
  154. }
  155. blkg->pd[i] = pd;
  156. pd->blkg = blkg;
  157. }
  158. /* invoke per-policy init */
  159. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  160. struct blkio_policy_type *pol = blkio_policy[i];
  161. if (pol)
  162. pol->ops.blkio_init_group_fn(blkg);
  163. }
  164. return blkg;
  165. }
  166. struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  167. struct request_queue *q,
  168. bool for_root)
  169. __releases(q->queue_lock) __acquires(q->queue_lock)
  170. {
  171. struct blkio_group *blkg;
  172. WARN_ON_ONCE(!rcu_read_lock_held());
  173. lockdep_assert_held(q->queue_lock);
  174. /*
  175. * This could be the first entry point of blkcg implementation and
  176. * we shouldn't allow anything to go through for a bypassing queue.
  177. * The following can be removed if blkg lookup is guaranteed to
  178. * fail on a bypassing queue.
  179. */
  180. if (unlikely(blk_queue_bypass(q)) && !for_root)
  181. return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  182. blkg = blkg_lookup(blkcg, q);
  183. if (blkg)
  184. return blkg;
  185. /* blkg holds a reference to blkcg */
  186. if (!css_tryget(&blkcg->css))
  187. return ERR_PTR(-EINVAL);
  188. /*
  189. * Allocate and initialize.
  190. */
  191. blkg = blkg_alloc(blkcg, q);
  192. /* did alloc fail? */
  193. if (unlikely(!blkg)) {
  194. blkg = ERR_PTR(-ENOMEM);
  195. goto out;
  196. }
  197. /* insert */
  198. spin_lock(&blkcg->lock);
  199. hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  200. list_add(&blkg->q_node, &q->blkg_list);
  201. spin_unlock(&blkcg->lock);
  202. spin_lock(&alloc_list_lock);
  203. list_add(&blkg->alloc_node, &alloc_list);
  204. /* Queue per cpu stat allocation from worker thread. */
  205. queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
  206. spin_unlock(&alloc_list_lock);
  207. out:
  208. return blkg;
  209. }
  210. EXPORT_SYMBOL_GPL(blkg_lookup_create);
  211. /* called under rcu_read_lock(). */
  212. struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  213. struct request_queue *q)
  214. {
  215. struct blkio_group *blkg;
  216. struct hlist_node *n;
  217. hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
  218. if (blkg->q == q)
  219. return blkg;
  220. return NULL;
  221. }
  222. EXPORT_SYMBOL_GPL(blkg_lookup);
  223. static void blkg_destroy(struct blkio_group *blkg)
  224. {
  225. struct request_queue *q = blkg->q;
  226. struct blkio_cgroup *blkcg = blkg->blkcg;
  227. lockdep_assert_held(q->queue_lock);
  228. lockdep_assert_held(&blkcg->lock);
  229. /* Something wrong if we are trying to remove same group twice */
  230. WARN_ON_ONCE(list_empty(&blkg->q_node));
  231. WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
  232. list_del_init(&blkg->q_node);
  233. hlist_del_init_rcu(&blkg->blkcg_node);
  234. spin_lock(&alloc_list_lock);
  235. list_del_init(&blkg->alloc_node);
  236. spin_unlock(&alloc_list_lock);
  237. /*
  238. * Put the reference taken at the time of creation so that when all
  239. * queues are gone, group can be destroyed.
  240. */
  241. blkg_put(blkg);
  242. }
  243. /*
  244. * XXX: This updates blkg policy data in-place for root blkg, which is
  245. * necessary across elevator switch and policy registration as root blkgs
  246. * aren't shot down. This broken and racy implementation is temporary.
  247. * Eventually, blkg shoot down will be replaced by proper in-place update.
  248. */
  249. void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
  250. {
  251. struct blkio_policy_type *pol = blkio_policy[plid];
  252. struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
  253. struct blkg_policy_data *pd;
  254. if (!blkg)
  255. return;
  256. kfree(blkg->pd[plid]);
  257. blkg->pd[plid] = NULL;
  258. if (!pol)
  259. return;
  260. pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
  261. WARN_ON_ONCE(!pd);
  262. pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
  263. WARN_ON_ONCE(!pd->stats_cpu);
  264. blkg->pd[plid] = pd;
  265. pd->blkg = blkg;
  266. pol->ops.blkio_init_group_fn(blkg);
  267. }
  268. EXPORT_SYMBOL_GPL(update_root_blkg_pd);
  269. /**
  270. * blkg_destroy_all - destroy all blkgs associated with a request_queue
  271. * @q: request_queue of interest
  272. * @destroy_root: whether to destroy root blkg or not
  273. *
  274. * Destroy blkgs associated with @q. If @destroy_root is %true, all are
  275. * destroyed; otherwise, root blkg is left alone.
  276. */
  277. void blkg_destroy_all(struct request_queue *q, bool destroy_root)
  278. {
  279. struct blkio_group *blkg, *n;
  280. spin_lock_irq(q->queue_lock);
  281. list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
  282. struct blkio_cgroup *blkcg = blkg->blkcg;
  283. /* skip root? */
  284. if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
  285. continue;
  286. spin_lock(&blkcg->lock);
  287. blkg_destroy(blkg);
  288. spin_unlock(&blkcg->lock);
  289. }
  290. spin_unlock_irq(q->queue_lock);
  291. }
  292. EXPORT_SYMBOL_GPL(blkg_destroy_all);
  293. static void blkg_rcu_free(struct rcu_head *rcu_head)
  294. {
  295. blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
  296. }
  297. void __blkg_release(struct blkio_group *blkg)
  298. {
  299. /* release the extra blkcg reference this blkg has been holding */
  300. css_put(&blkg->blkcg->css);
  301. /*
  302. * A group is freed in rcu manner. But having an rcu lock does not
  303. * mean that one can access all the fields of blkg and assume these
  304. * are valid. For example, don't try to follow throtl_data and
  305. * request queue links.
  306. *
  307. * Having a reference to blkg under an rcu allows acess to only
  308. * values local to groups like group stats and group rate limits
  309. */
  310. call_rcu(&blkg->rcu_head, blkg_rcu_free);
  311. }
  312. EXPORT_SYMBOL_GPL(__blkg_release);
  313. static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
  314. {
  315. struct blkg_policy_data *pd = blkg->pd[plid];
  316. int cpu;
  317. if (pd->stats_cpu == NULL)
  318. return;
  319. for_each_possible_cpu(cpu) {
  320. struct blkio_group_stats_cpu *sc =
  321. per_cpu_ptr(pd->stats_cpu, cpu);
  322. blkg_rwstat_reset(&sc->service_bytes);
  323. blkg_rwstat_reset(&sc->serviced);
  324. }
  325. }
  326. static int
  327. blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
  328. {
  329. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  330. struct blkio_group *blkg;
  331. struct hlist_node *n;
  332. spin_lock(&blkio_list_lock);
  333. spin_lock_irq(&blkcg->lock);
  334. /*
  335. * Note that stat reset is racy - it doesn't synchronize against
  336. * stat updates. This is a debug feature which shouldn't exist
  337. * anyway. If you get hit by a race, retry.
  338. */
  339. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
  340. struct blkio_policy_type *pol;
  341. list_for_each_entry(pol, &blkio_list, list) {
  342. blkio_reset_stats_cpu(blkg, pol->plid);
  343. if (pol->ops.blkio_reset_group_stats_fn)
  344. pol->ops.blkio_reset_group_stats_fn(blkg);
  345. }
  346. }
  347. spin_unlock_irq(&blkcg->lock);
  348. spin_unlock(&blkio_list_lock);
  349. return 0;
  350. }
  351. static const char *blkg_dev_name(struct blkio_group *blkg)
  352. {
  353. /* some drivers (floppy) instantiate a queue w/o disk registered */
  354. if (blkg->q->backing_dev_info.dev)
  355. return dev_name(blkg->q->backing_dev_info.dev);
  356. return NULL;
  357. }
  358. /**
  359. * blkcg_print_blkgs - helper for printing per-blkg data
  360. * @sf: seq_file to print to
  361. * @blkcg: blkcg of interest
  362. * @prfill: fill function to print out a blkg
  363. * @pol: policy in question
  364. * @data: data to be passed to @prfill
  365. * @show_total: to print out sum of prfill return values or not
  366. *
  367. * This function invokes @prfill on each blkg of @blkcg if pd for the
  368. * policy specified by @pol exists. @prfill is invoked with @sf, the
  369. * policy data and @data. If @show_total is %true, the sum of the return
  370. * values from @prfill is printed with "Total" label at the end.
  371. *
  372. * This is to be used to construct print functions for
  373. * cftype->read_seq_string method.
  374. */
  375. void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
  376. u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
  377. int pol, int data, bool show_total)
  378. {
  379. struct blkio_group *blkg;
  380. struct hlist_node *n;
  381. u64 total = 0;
  382. spin_lock_irq(&blkcg->lock);
  383. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
  384. if (blkg->pd[pol])
  385. total += prfill(sf, blkg->pd[pol], data);
  386. spin_unlock_irq(&blkcg->lock);
  387. if (show_total)
  388. seq_printf(sf, "Total %llu\n", (unsigned long long)total);
  389. }
  390. EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
  391. /**
  392. * __blkg_prfill_u64 - prfill helper for a single u64 value
  393. * @sf: seq_file to print to
  394. * @pd: policy data of interest
  395. * @v: value to print
  396. *
  397. * Print @v to @sf for the device assocaited with @pd.
  398. */
  399. u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
  400. {
  401. const char *dname = blkg_dev_name(pd->blkg);
  402. if (!dname)
  403. return 0;
  404. seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
  405. return v;
  406. }
  407. EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
  408. /**
  409. * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  410. * @sf: seq_file to print to
  411. * @pd: policy data of interest
  412. * @rwstat: rwstat to print
  413. *
  414. * Print @rwstat to @sf for the device assocaited with @pd.
  415. */
  416. u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  417. const struct blkg_rwstat *rwstat)
  418. {
  419. static const char *rwstr[] = {
  420. [BLKG_RWSTAT_READ] = "Read",
  421. [BLKG_RWSTAT_WRITE] = "Write",
  422. [BLKG_RWSTAT_SYNC] = "Sync",
  423. [BLKG_RWSTAT_ASYNC] = "Async",
  424. };
  425. const char *dname = blkg_dev_name(pd->blkg);
  426. u64 v;
  427. int i;
  428. if (!dname)
  429. return 0;
  430. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  431. seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
  432. (unsigned long long)rwstat->cnt[i]);
  433. v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
  434. seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
  435. return v;
  436. }
  437. static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
  438. int off)
  439. {
  440. return __blkg_prfill_u64(sf, pd,
  441. blkg_stat_read((void *)pd->pdata + off));
  442. }
  443. static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  444. int off)
  445. {
  446. struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->pdata + off);
  447. return __blkg_prfill_rwstat(sf, pd, &rwstat);
  448. }
  449. /* print blkg_stat specified by BLKCG_STAT_PRIV() */
  450. int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
  451. struct seq_file *sf)
  452. {
  453. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
  454. blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat,
  455. BLKCG_STAT_POL(cft->private),
  456. BLKCG_STAT_OFF(cft->private), false);
  457. return 0;
  458. }
  459. EXPORT_SYMBOL_GPL(blkcg_print_stat);
  460. /* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
  461. int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
  462. struct seq_file *sf)
  463. {
  464. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
  465. blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat,
  466. BLKCG_STAT_POL(cft->private),
  467. BLKCG_STAT_OFF(cft->private), true);
  468. return 0;
  469. }
  470. EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
  471. /**
  472. * blkg_conf_prep - parse and prepare for per-blkg config update
  473. * @blkcg: target block cgroup
  474. * @input: input string
  475. * @ctx: blkg_conf_ctx to be filled
  476. *
  477. * Parse per-blkg config update from @input and initialize @ctx with the
  478. * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
  479. * value. This function returns with RCU read locked and must be paired
  480. * with blkg_conf_finish().
  481. */
  482. int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
  483. struct blkg_conf_ctx *ctx)
  484. __acquires(rcu)
  485. {
  486. struct gendisk *disk;
  487. struct blkio_group *blkg;
  488. unsigned int major, minor;
  489. unsigned long long v;
  490. int part, ret;
  491. if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
  492. return -EINVAL;
  493. disk = get_gendisk(MKDEV(major, minor), &part);
  494. if (!disk || part)
  495. return -EINVAL;
  496. rcu_read_lock();
  497. spin_lock_irq(disk->queue->queue_lock);
  498. blkg = blkg_lookup_create(blkcg, disk->queue, false);
  499. spin_unlock_irq(disk->queue->queue_lock);
  500. if (IS_ERR(blkg)) {
  501. ret = PTR_ERR(blkg);
  502. rcu_read_unlock();
  503. put_disk(disk);
  504. /*
  505. * If queue was bypassing, we should retry. Do so after a
  506. * short msleep(). It isn't strictly necessary but queue
  507. * can be bypassing for some time and it's always nice to
  508. * avoid busy looping.
  509. */
  510. if (ret == -EBUSY) {
  511. msleep(10);
  512. ret = restart_syscall();
  513. }
  514. return ret;
  515. }
  516. ctx->disk = disk;
  517. ctx->blkg = blkg;
  518. ctx->v = v;
  519. return 0;
  520. }
  521. EXPORT_SYMBOL_GPL(blkg_conf_prep);
  522. /**
  523. * blkg_conf_finish - finish up per-blkg config update
  524. * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
  525. *
  526. * Finish up after per-blkg config update. This function must be paired
  527. * with blkg_conf_prep().
  528. */
  529. void blkg_conf_finish(struct blkg_conf_ctx *ctx)
  530. __releases(rcu)
  531. {
  532. rcu_read_unlock();
  533. put_disk(ctx->disk);
  534. }
  535. EXPORT_SYMBOL_GPL(blkg_conf_finish);
  536. struct cftype blkio_files[] = {
  537. {
  538. .name = "reset_stats",
  539. .write_u64 = blkiocg_reset_stats,
  540. },
  541. { } /* terminate */
  542. };
  543. /**
  544. * blkiocg_pre_destroy - cgroup pre_destroy callback
  545. * @cgroup: cgroup of interest
  546. *
  547. * This function is called when @cgroup is about to go away and responsible
  548. * for shooting down all blkgs associated with @cgroup. blkgs should be
  549. * removed while holding both q and blkcg locks. As blkcg lock is nested
  550. * inside q lock, this function performs reverse double lock dancing.
  551. *
  552. * This is the blkcg counterpart of ioc_release_fn().
  553. */
  554. static int blkiocg_pre_destroy(struct cgroup *cgroup)
  555. {
  556. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  557. spin_lock_irq(&blkcg->lock);
  558. while (!hlist_empty(&blkcg->blkg_list)) {
  559. struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
  560. struct blkio_group, blkcg_node);
  561. struct request_queue *q = blkg->q;
  562. if (spin_trylock(q->queue_lock)) {
  563. blkg_destroy(blkg);
  564. spin_unlock(q->queue_lock);
  565. } else {
  566. spin_unlock_irq(&blkcg->lock);
  567. cpu_relax();
  568. spin_lock_irq(&blkcg->lock);
  569. }
  570. }
  571. spin_unlock_irq(&blkcg->lock);
  572. return 0;
  573. }
  574. static void blkiocg_destroy(struct cgroup *cgroup)
  575. {
  576. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  577. if (blkcg != &blkio_root_cgroup)
  578. kfree(blkcg);
  579. }
  580. static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
  581. {
  582. static atomic64_t id_seq = ATOMIC64_INIT(0);
  583. struct blkio_cgroup *blkcg;
  584. struct cgroup *parent = cgroup->parent;
  585. if (!parent) {
  586. blkcg = &blkio_root_cgroup;
  587. goto done;
  588. }
  589. blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  590. if (!blkcg)
  591. return ERR_PTR(-ENOMEM);
  592. blkcg->weight = BLKIO_WEIGHT_DEFAULT;
  593. blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
  594. done:
  595. spin_lock_init(&blkcg->lock);
  596. INIT_HLIST_HEAD(&blkcg->blkg_list);
  597. return &blkcg->css;
  598. }
  599. /**
  600. * blkcg_init_queue - initialize blkcg part of request queue
  601. * @q: request_queue to initialize
  602. *
  603. * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
  604. * part of new request_queue @q.
  605. *
  606. * RETURNS:
  607. * 0 on success, -errno on failure.
  608. */
  609. int blkcg_init_queue(struct request_queue *q)
  610. {
  611. int ret;
  612. might_sleep();
  613. ret = blk_throtl_init(q);
  614. if (ret)
  615. return ret;
  616. mutex_lock(&all_q_mutex);
  617. INIT_LIST_HEAD(&q->all_q_node);
  618. list_add_tail(&q->all_q_node, &all_q_list);
  619. mutex_unlock(&all_q_mutex);
  620. return 0;
  621. }
  622. /**
  623. * blkcg_drain_queue - drain blkcg part of request_queue
  624. * @q: request_queue to drain
  625. *
  626. * Called from blk_drain_queue(). Responsible for draining blkcg part.
  627. */
  628. void blkcg_drain_queue(struct request_queue *q)
  629. {
  630. lockdep_assert_held(q->queue_lock);
  631. blk_throtl_drain(q);
  632. }
  633. /**
  634. * blkcg_exit_queue - exit and release blkcg part of request_queue
  635. * @q: request_queue being released
  636. *
  637. * Called from blk_release_queue(). Responsible for exiting blkcg part.
  638. */
  639. void blkcg_exit_queue(struct request_queue *q)
  640. {
  641. mutex_lock(&all_q_mutex);
  642. list_del_init(&q->all_q_node);
  643. mutex_unlock(&all_q_mutex);
  644. blkg_destroy_all(q, true);
  645. blk_throtl_exit(q);
  646. }
  647. /*
  648. * We cannot support shared io contexts, as we have no mean to support
  649. * two tasks with the same ioc in two different groups without major rework
  650. * of the main cic data structures. For now we allow a task to change
  651. * its cgroup only if it's the only owner of its ioc.
  652. */
  653. static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  654. {
  655. struct task_struct *task;
  656. struct io_context *ioc;
  657. int ret = 0;
  658. /* task_lock() is needed to avoid races with exit_io_context() */
  659. cgroup_taskset_for_each(task, cgrp, tset) {
  660. task_lock(task);
  661. ioc = task->io_context;
  662. if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  663. ret = -EINVAL;
  664. task_unlock(task);
  665. if (ret)
  666. break;
  667. }
  668. return ret;
  669. }
  670. static void blkcg_bypass_start(void)
  671. __acquires(&all_q_mutex)
  672. {
  673. struct request_queue *q;
  674. mutex_lock(&all_q_mutex);
  675. list_for_each_entry(q, &all_q_list, all_q_node) {
  676. blk_queue_bypass_start(q);
  677. blkg_destroy_all(q, false);
  678. }
  679. }
  680. static void blkcg_bypass_end(void)
  681. __releases(&all_q_mutex)
  682. {
  683. struct request_queue *q;
  684. list_for_each_entry(q, &all_q_list, all_q_node)
  685. blk_queue_bypass_end(q);
  686. mutex_unlock(&all_q_mutex);
  687. }
  688. struct cgroup_subsys blkio_subsys = {
  689. .name = "blkio",
  690. .create = blkiocg_create,
  691. .can_attach = blkiocg_can_attach,
  692. .pre_destroy = blkiocg_pre_destroy,
  693. .destroy = blkiocg_destroy,
  694. .subsys_id = blkio_subsys_id,
  695. .base_cftypes = blkio_files,
  696. .module = THIS_MODULE,
  697. };
  698. EXPORT_SYMBOL_GPL(blkio_subsys);
  699. void blkio_policy_register(struct blkio_policy_type *blkiop)
  700. {
  701. struct request_queue *q;
  702. blkcg_bypass_start();
  703. spin_lock(&blkio_list_lock);
  704. BUG_ON(blkio_policy[blkiop->plid]);
  705. blkio_policy[blkiop->plid] = blkiop;
  706. list_add_tail(&blkiop->list, &blkio_list);
  707. spin_unlock(&blkio_list_lock);
  708. list_for_each_entry(q, &all_q_list, all_q_node)
  709. update_root_blkg_pd(q, blkiop->plid);
  710. blkcg_bypass_end();
  711. if (blkiop->cftypes)
  712. WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
  713. }
  714. EXPORT_SYMBOL_GPL(blkio_policy_register);
  715. void blkio_policy_unregister(struct blkio_policy_type *blkiop)
  716. {
  717. struct request_queue *q;
  718. if (blkiop->cftypes)
  719. cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
  720. blkcg_bypass_start();
  721. spin_lock(&blkio_list_lock);
  722. BUG_ON(blkio_policy[blkiop->plid] != blkiop);
  723. blkio_policy[blkiop->plid] = NULL;
  724. list_del_init(&blkiop->list);
  725. spin_unlock(&blkio_list_lock);
  726. list_for_each_entry(q, &all_q_list, all_q_node)
  727. update_root_blkg_pd(q, blkiop->plid);
  728. blkcg_bypass_end();
  729. }
  730. EXPORT_SYMBOL_GPL(blkio_policy_unregister);