blk-cgroup.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948
  1. /*
  2. * Common Block IO controller cgroup interface
  3. *
  4. * Based on ideas and code from CFQ, CFS and BFQ:
  5. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  6. *
  7. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  8. * Paolo Valente <paolo.valente@unimore.it>
  9. *
  10. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  11. * Nauman Rafique <nauman@google.com>
  12. */
  13. #include <linux/ioprio.h>
  14. #include <linux/kdev_t.h>
  15. #include <linux/module.h>
  16. #include <linux/err.h>
  17. #include <linux/blkdev.h>
  18. #include <linux/slab.h>
  19. #include <linux/genhd.h>
  20. #include <linux/delay.h>
  21. #include <linux/atomic.h>
  22. #include "blk-cgroup.h"
  23. #include "blk.h"
  24. #define MAX_KEY_LEN 100
  25. static DEFINE_SPINLOCK(blkio_list_lock);
  26. static LIST_HEAD(blkio_list);
  27. static DEFINE_MUTEX(all_q_mutex);
  28. static LIST_HEAD(all_q_list);
  29. /* List of groups pending per cpu stats allocation */
  30. static DEFINE_SPINLOCK(alloc_list_lock);
  31. static LIST_HEAD(alloc_list);
  32. static void blkio_stat_alloc_fn(struct work_struct *);
  33. static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
  34. struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
  35. EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  36. static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
  37. struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  38. {
  39. return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
  40. struct blkio_cgroup, css);
  41. }
  42. EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  43. static struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
  44. {
  45. return container_of(task_subsys_state(tsk, blkio_subsys_id),
  46. struct blkio_cgroup, css);
  47. }
  48. struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio)
  49. {
  50. if (bio && bio->bi_css)
  51. return container_of(bio->bi_css, struct blkio_cgroup, css);
  52. return task_blkio_cgroup(current);
  53. }
  54. EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
  55. /*
  56. * Worker for allocating per cpu stat for blk groups. This is scheduled on
  57. * the system_nrt_wq once there are some groups on the alloc_list waiting
  58. * for allocation.
  59. */
  60. static void blkio_stat_alloc_fn(struct work_struct *work)
  61. {
  62. static void *pcpu_stats[BLKIO_NR_POLICIES];
  63. struct delayed_work *dwork = to_delayed_work(work);
  64. struct blkio_group *blkg;
  65. int i;
  66. bool empty = false;
  67. alloc_stats:
  68. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  69. if (pcpu_stats[i] != NULL)
  70. continue;
  71. pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
  72. /* Allocation failed. Try again after some time. */
  73. if (pcpu_stats[i] == NULL) {
  74. queue_delayed_work(system_nrt_wq, dwork,
  75. msecs_to_jiffies(10));
  76. return;
  77. }
  78. }
  79. spin_lock_irq(&blkio_list_lock);
  80. spin_lock(&alloc_list_lock);
  81. /* cgroup got deleted or queue exited. */
  82. if (!list_empty(&alloc_list)) {
  83. blkg = list_first_entry(&alloc_list, struct blkio_group,
  84. alloc_node);
  85. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  86. struct blkg_policy_data *pd = blkg->pd[i];
  87. if (blkio_policy[i] && pd && !pd->stats_cpu)
  88. swap(pd->stats_cpu, pcpu_stats[i]);
  89. }
  90. list_del_init(&blkg->alloc_node);
  91. }
  92. empty = list_empty(&alloc_list);
  93. spin_unlock(&alloc_list_lock);
  94. spin_unlock_irq(&blkio_list_lock);
  95. if (!empty)
  96. goto alloc_stats;
  97. }
  98. /**
  99. * blkg_free - free a blkg
  100. * @blkg: blkg to free
  101. *
  102. * Free @blkg which may be partially allocated.
  103. */
  104. static void blkg_free(struct blkio_group *blkg)
  105. {
  106. int i;
  107. if (!blkg)
  108. return;
  109. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  110. struct blkg_policy_data *pd = blkg->pd[i];
  111. if (pd) {
  112. free_percpu(pd->stats_cpu);
  113. kfree(pd);
  114. }
  115. }
  116. kfree(blkg);
  117. }
  118. /**
  119. * blkg_alloc - allocate a blkg
  120. * @blkcg: block cgroup the new blkg is associated with
  121. * @q: request_queue the new blkg is associated with
  122. *
  123. * Allocate a new blkg assocating @blkcg and @q.
  124. */
  125. static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
  126. struct request_queue *q)
  127. {
  128. struct blkio_group *blkg;
  129. int i;
  130. /* alloc and init base part */
  131. blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
  132. if (!blkg)
  133. return NULL;
  134. blkg->q = q;
  135. INIT_LIST_HEAD(&blkg->q_node);
  136. INIT_LIST_HEAD(&blkg->alloc_node);
  137. blkg->blkcg = blkcg;
  138. blkg->refcnt = 1;
  139. cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
  140. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  141. struct blkio_policy_type *pol = blkio_policy[i];
  142. struct blkg_policy_data *pd;
  143. if (!pol)
  144. continue;
  145. /* alloc per-policy data and attach it to blkg */
  146. pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
  147. q->node);
  148. if (!pd) {
  149. blkg_free(blkg);
  150. return NULL;
  151. }
  152. blkg->pd[i] = pd;
  153. pd->blkg = blkg;
  154. }
  155. /* invoke per-policy init */
  156. for (i = 0; i < BLKIO_NR_POLICIES; i++) {
  157. struct blkio_policy_type *pol = blkio_policy[i];
  158. if (pol)
  159. pol->ops.blkio_init_group_fn(blkg);
  160. }
  161. return blkg;
  162. }
  163. struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  164. struct request_queue *q,
  165. bool for_root)
  166. __releases(q->queue_lock) __acquires(q->queue_lock)
  167. {
  168. struct blkio_group *blkg;
  169. WARN_ON_ONCE(!rcu_read_lock_held());
  170. lockdep_assert_held(q->queue_lock);
  171. /*
  172. * This could be the first entry point of blkcg implementation and
  173. * we shouldn't allow anything to go through for a bypassing queue.
  174. * The following can be removed if blkg lookup is guaranteed to
  175. * fail on a bypassing queue.
  176. */
  177. if (unlikely(blk_queue_bypass(q)) && !for_root)
  178. return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
  179. blkg = blkg_lookup(blkcg, q);
  180. if (blkg)
  181. return blkg;
  182. /* blkg holds a reference to blkcg */
  183. if (!css_tryget(&blkcg->css))
  184. return ERR_PTR(-EINVAL);
  185. /*
  186. * Allocate and initialize.
  187. */
  188. blkg = blkg_alloc(blkcg, q);
  189. /* did alloc fail? */
  190. if (unlikely(!blkg)) {
  191. blkg = ERR_PTR(-ENOMEM);
  192. goto out;
  193. }
  194. /* insert */
  195. spin_lock(&blkcg->lock);
  196. hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
  197. list_add(&blkg->q_node, &q->blkg_list);
  198. spin_unlock(&blkcg->lock);
  199. spin_lock(&alloc_list_lock);
  200. list_add(&blkg->alloc_node, &alloc_list);
  201. /* Queue per cpu stat allocation from worker thread. */
  202. queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
  203. spin_unlock(&alloc_list_lock);
  204. out:
  205. return blkg;
  206. }
  207. EXPORT_SYMBOL_GPL(blkg_lookup_create);
  208. /* called under rcu_read_lock(). */
  209. struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  210. struct request_queue *q)
  211. {
  212. struct blkio_group *blkg;
  213. struct hlist_node *n;
  214. hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
  215. if (blkg->q == q)
  216. return blkg;
  217. return NULL;
  218. }
  219. EXPORT_SYMBOL_GPL(blkg_lookup);
  220. static void blkg_destroy(struct blkio_group *blkg)
  221. {
  222. struct request_queue *q = blkg->q;
  223. struct blkio_cgroup *blkcg = blkg->blkcg;
  224. lockdep_assert_held(q->queue_lock);
  225. lockdep_assert_held(&blkcg->lock);
  226. /* Something wrong if we are trying to remove same group twice */
  227. WARN_ON_ONCE(list_empty(&blkg->q_node));
  228. WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
  229. list_del_init(&blkg->q_node);
  230. hlist_del_init_rcu(&blkg->blkcg_node);
  231. spin_lock(&alloc_list_lock);
  232. list_del_init(&blkg->alloc_node);
  233. spin_unlock(&alloc_list_lock);
  234. /*
  235. * Put the reference taken at the time of creation so that when all
  236. * queues are gone, group can be destroyed.
  237. */
  238. blkg_put(blkg);
  239. }
  240. /*
  241. * XXX: This updates blkg policy data in-place for root blkg, which is
  242. * necessary across elevator switch and policy registration as root blkgs
  243. * aren't shot down. This broken and racy implementation is temporary.
  244. * Eventually, blkg shoot down will be replaced by proper in-place update.
  245. */
  246. void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
  247. {
  248. struct blkio_policy_type *pol = blkio_policy[plid];
  249. struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
  250. struct blkg_policy_data *pd;
  251. if (!blkg)
  252. return;
  253. kfree(blkg->pd[plid]);
  254. blkg->pd[plid] = NULL;
  255. if (!pol)
  256. return;
  257. pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
  258. WARN_ON_ONCE(!pd);
  259. pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
  260. WARN_ON_ONCE(!pd->stats_cpu);
  261. blkg->pd[plid] = pd;
  262. pd->blkg = blkg;
  263. pol->ops.blkio_init_group_fn(blkg);
  264. }
  265. EXPORT_SYMBOL_GPL(update_root_blkg_pd);
  266. /**
  267. * blkg_destroy_all - destroy all blkgs associated with a request_queue
  268. * @q: request_queue of interest
  269. * @destroy_root: whether to destroy root blkg or not
  270. *
  271. * Destroy blkgs associated with @q. If @destroy_root is %true, all are
  272. * destroyed; otherwise, root blkg is left alone.
  273. */
  274. void blkg_destroy_all(struct request_queue *q, bool destroy_root)
  275. {
  276. struct blkio_group *blkg, *n;
  277. spin_lock_irq(q->queue_lock);
  278. list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
  279. struct blkio_cgroup *blkcg = blkg->blkcg;
  280. /* skip root? */
  281. if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
  282. continue;
  283. spin_lock(&blkcg->lock);
  284. blkg_destroy(blkg);
  285. spin_unlock(&blkcg->lock);
  286. }
  287. spin_unlock_irq(q->queue_lock);
  288. }
  289. EXPORT_SYMBOL_GPL(blkg_destroy_all);
  290. static void blkg_rcu_free(struct rcu_head *rcu_head)
  291. {
  292. blkg_free(container_of(rcu_head, struct blkio_group, rcu_head));
  293. }
  294. void __blkg_release(struct blkio_group *blkg)
  295. {
  296. /* release the extra blkcg reference this blkg has been holding */
  297. css_put(&blkg->blkcg->css);
  298. /*
  299. * A group is freed in rcu manner. But having an rcu lock does not
  300. * mean that one can access all the fields of blkg and assume these
  301. * are valid. For example, don't try to follow throtl_data and
  302. * request queue links.
  303. *
  304. * Having a reference to blkg under an rcu allows acess to only
  305. * values local to groups like group stats and group rate limits
  306. */
  307. call_rcu(&blkg->rcu_head, blkg_rcu_free);
  308. }
  309. EXPORT_SYMBOL_GPL(__blkg_release);
  310. static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
  311. {
  312. struct blkg_policy_data *pd = blkg->pd[plid];
  313. int cpu;
  314. if (pd->stats_cpu == NULL)
  315. return;
  316. for_each_possible_cpu(cpu) {
  317. struct blkio_group_stats_cpu *sc =
  318. per_cpu_ptr(pd->stats_cpu, cpu);
  319. blkg_rwstat_reset(&sc->service_bytes);
  320. blkg_rwstat_reset(&sc->serviced);
  321. blkg_stat_reset(&sc->sectors);
  322. }
  323. }
  324. static int
  325. blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
  326. {
  327. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  328. struct blkio_group *blkg;
  329. struct hlist_node *n;
  330. spin_lock(&blkio_list_lock);
  331. spin_lock_irq(&blkcg->lock);
  332. /*
  333. * Note that stat reset is racy - it doesn't synchronize against
  334. * stat updates. This is a debug feature which shouldn't exist
  335. * anyway. If you get hit by a race, retry.
  336. */
  337. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
  338. struct blkio_policy_type *pol;
  339. list_for_each_entry(pol, &blkio_list, list) {
  340. struct blkg_policy_data *pd = blkg->pd[pol->plid];
  341. struct blkio_group_stats *stats = &pd->stats;
  342. /* queued stats shouldn't be cleared */
  343. blkg_rwstat_reset(&stats->merged);
  344. blkg_rwstat_reset(&stats->service_time);
  345. blkg_rwstat_reset(&stats->wait_time);
  346. blkg_stat_reset(&stats->time);
  347. #ifdef CONFIG_DEBUG_BLK_CGROUP
  348. blkg_stat_reset(&stats->unaccounted_time);
  349. blkg_stat_reset(&stats->avg_queue_size_sum);
  350. blkg_stat_reset(&stats->avg_queue_size_samples);
  351. blkg_stat_reset(&stats->dequeue);
  352. blkg_stat_reset(&stats->group_wait_time);
  353. blkg_stat_reset(&stats->idle_time);
  354. blkg_stat_reset(&stats->empty_time);
  355. #endif
  356. blkio_reset_stats_cpu(blkg, pol->plid);
  357. }
  358. }
  359. spin_unlock_irq(&blkcg->lock);
  360. spin_unlock(&blkio_list_lock);
  361. return 0;
  362. }
  363. static const char *blkg_dev_name(struct blkio_group *blkg)
  364. {
  365. /* some drivers (floppy) instantiate a queue w/o disk registered */
  366. if (blkg->q->backing_dev_info.dev)
  367. return dev_name(blkg->q->backing_dev_info.dev);
  368. return NULL;
  369. }
  370. /**
  371. * blkcg_print_blkgs - helper for printing per-blkg data
  372. * @sf: seq_file to print to
  373. * @blkcg: blkcg of interest
  374. * @prfill: fill function to print out a blkg
  375. * @pol: policy in question
  376. * @data: data to be passed to @prfill
  377. * @show_total: to print out sum of prfill return values or not
  378. *
  379. * This function invokes @prfill on each blkg of @blkcg if pd for the
  380. * policy specified by @pol exists. @prfill is invoked with @sf, the
  381. * policy data and @data. If @show_total is %true, the sum of the return
  382. * values from @prfill is printed with "Total" label at the end.
  383. *
  384. * This is to be used to construct print functions for
  385. * cftype->read_seq_string method.
  386. */
  387. void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
  388. u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
  389. int pol, int data, bool show_total)
  390. {
  391. struct blkio_group *blkg;
  392. struct hlist_node *n;
  393. u64 total = 0;
  394. spin_lock_irq(&blkcg->lock);
  395. hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
  396. if (blkg->pd[pol])
  397. total += prfill(sf, blkg->pd[pol], data);
  398. spin_unlock_irq(&blkcg->lock);
  399. if (show_total)
  400. seq_printf(sf, "Total %llu\n", (unsigned long long)total);
  401. }
  402. EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
  403. /**
  404. * __blkg_prfill_u64 - prfill helper for a single u64 value
  405. * @sf: seq_file to print to
  406. * @pd: policy data of interest
  407. * @v: value to print
  408. *
  409. * Print @v to @sf for the device assocaited with @pd.
  410. */
  411. u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
  412. {
  413. const char *dname = blkg_dev_name(pd->blkg);
  414. if (!dname)
  415. return 0;
  416. seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
  417. return v;
  418. }
  419. EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
  420. /**
  421. * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
  422. * @sf: seq_file to print to
  423. * @pd: policy data of interest
  424. * @rwstat: rwstat to print
  425. *
  426. * Print @rwstat to @sf for the device assocaited with @pd.
  427. */
  428. u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  429. const struct blkg_rwstat *rwstat)
  430. {
  431. static const char *rwstr[] = {
  432. [BLKG_RWSTAT_READ] = "Read",
  433. [BLKG_RWSTAT_WRITE] = "Write",
  434. [BLKG_RWSTAT_SYNC] = "Sync",
  435. [BLKG_RWSTAT_ASYNC] = "Async",
  436. };
  437. const char *dname = blkg_dev_name(pd->blkg);
  438. u64 v;
  439. int i;
  440. if (!dname)
  441. return 0;
  442. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  443. seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
  444. (unsigned long long)rwstat->cnt[i]);
  445. v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
  446. seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
  447. return v;
  448. }
  449. static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
  450. int off)
  451. {
  452. return __blkg_prfill_u64(sf, pd,
  453. blkg_stat_read((void *)&pd->stats + off));
  454. }
  455. static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
  456. int off)
  457. {
  458. struct blkg_rwstat rwstat = blkg_rwstat_read((void *)&pd->stats + off);
  459. return __blkg_prfill_rwstat(sf, pd, &rwstat);
  460. }
  461. /* print blkg_stat specified by BLKCG_STAT_PRIV() */
  462. int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
  463. struct seq_file *sf)
  464. {
  465. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
  466. blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat,
  467. BLKCG_STAT_POL(cft->private),
  468. BLKCG_STAT_OFF(cft->private), false);
  469. return 0;
  470. }
  471. EXPORT_SYMBOL_GPL(blkcg_print_stat);
  472. /* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
  473. int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
  474. struct seq_file *sf)
  475. {
  476. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
  477. blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat,
  478. BLKCG_STAT_POL(cft->private),
  479. BLKCG_STAT_OFF(cft->private), true);
  480. return 0;
  481. }
  482. EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
  483. static u64 blkg_prfill_cpu_stat(struct seq_file *sf,
  484. struct blkg_policy_data *pd, int off)
  485. {
  486. u64 v = 0;
  487. int cpu;
  488. for_each_possible_cpu(cpu) {
  489. struct blkio_group_stats_cpu *sc =
  490. per_cpu_ptr(pd->stats_cpu, cpu);
  491. v += blkg_stat_read((void *)sc + off);
  492. }
  493. return __blkg_prfill_u64(sf, pd, v);
  494. }
  495. static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
  496. struct blkg_policy_data *pd, int off)
  497. {
  498. struct blkg_rwstat rwstat = { }, tmp;
  499. int i, cpu;
  500. for_each_possible_cpu(cpu) {
  501. struct blkio_group_stats_cpu *sc =
  502. per_cpu_ptr(pd->stats_cpu, cpu);
  503. tmp = blkg_rwstat_read((void *)sc + off);
  504. for (i = 0; i < BLKG_RWSTAT_NR; i++)
  505. rwstat.cnt[i] += tmp.cnt[i];
  506. }
  507. return __blkg_prfill_rwstat(sf, pd, &rwstat);
  508. }
  509. /* print per-cpu blkg_stat specified by BLKCG_STAT_PRIV() */
  510. int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
  511. struct seq_file *sf)
  512. {
  513. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
  514. blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_stat,
  515. BLKCG_STAT_POL(cft->private),
  516. BLKCG_STAT_OFF(cft->private), false);
  517. return 0;
  518. }
  519. EXPORT_SYMBOL_GPL(blkcg_print_cpu_stat);
  520. /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
  521. int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
  522. struct seq_file *sf)
  523. {
  524. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
  525. blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat,
  526. BLKCG_STAT_POL(cft->private),
  527. BLKCG_STAT_OFF(cft->private), true);
  528. return 0;
  529. }
  530. EXPORT_SYMBOL_GPL(blkcg_print_cpu_rwstat);
  531. /**
  532. * blkg_conf_prep - parse and prepare for per-blkg config update
  533. * @blkcg: target block cgroup
  534. * @input: input string
  535. * @ctx: blkg_conf_ctx to be filled
  536. *
  537. * Parse per-blkg config update from @input and initialize @ctx with the
  538. * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
  539. * value. This function returns with RCU read locked and must be paired
  540. * with blkg_conf_finish().
  541. */
  542. int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
  543. struct blkg_conf_ctx *ctx)
  544. __acquires(rcu)
  545. {
  546. struct gendisk *disk;
  547. struct blkio_group *blkg;
  548. unsigned int major, minor;
  549. unsigned long long v;
  550. int part, ret;
  551. if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
  552. return -EINVAL;
  553. disk = get_gendisk(MKDEV(major, minor), &part);
  554. if (!disk || part)
  555. return -EINVAL;
  556. rcu_read_lock();
  557. spin_lock_irq(disk->queue->queue_lock);
  558. blkg = blkg_lookup_create(blkcg, disk->queue, false);
  559. spin_unlock_irq(disk->queue->queue_lock);
  560. if (IS_ERR(blkg)) {
  561. ret = PTR_ERR(blkg);
  562. rcu_read_unlock();
  563. put_disk(disk);
  564. /*
  565. * If queue was bypassing, we should retry. Do so after a
  566. * short msleep(). It isn't strictly necessary but queue
  567. * can be bypassing for some time and it's always nice to
  568. * avoid busy looping.
  569. */
  570. if (ret == -EBUSY) {
  571. msleep(10);
  572. ret = restart_syscall();
  573. }
  574. return ret;
  575. }
  576. ctx->disk = disk;
  577. ctx->blkg = blkg;
  578. ctx->v = v;
  579. return 0;
  580. }
  581. EXPORT_SYMBOL_GPL(blkg_conf_prep);
  582. /**
  583. * blkg_conf_finish - finish up per-blkg config update
  584. * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
  585. *
  586. * Finish up after per-blkg config update. This function must be paired
  587. * with blkg_conf_prep().
  588. */
  589. void blkg_conf_finish(struct blkg_conf_ctx *ctx)
  590. __releases(rcu)
  591. {
  592. rcu_read_unlock();
  593. put_disk(ctx->disk);
  594. }
  595. EXPORT_SYMBOL_GPL(blkg_conf_finish);
  596. struct cftype blkio_files[] = {
  597. {
  598. .name = "reset_stats",
  599. .write_u64 = blkiocg_reset_stats,
  600. },
  601. { } /* terminate */
  602. };
  603. /**
  604. * blkiocg_pre_destroy - cgroup pre_destroy callback
  605. * @cgroup: cgroup of interest
  606. *
  607. * This function is called when @cgroup is about to go away and responsible
  608. * for shooting down all blkgs associated with @cgroup. blkgs should be
  609. * removed while holding both q and blkcg locks. As blkcg lock is nested
  610. * inside q lock, this function performs reverse double lock dancing.
  611. *
  612. * This is the blkcg counterpart of ioc_release_fn().
  613. */
  614. static int blkiocg_pre_destroy(struct cgroup *cgroup)
  615. {
  616. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  617. spin_lock_irq(&blkcg->lock);
  618. while (!hlist_empty(&blkcg->blkg_list)) {
  619. struct blkio_group *blkg = hlist_entry(blkcg->blkg_list.first,
  620. struct blkio_group, blkcg_node);
  621. struct request_queue *q = blkg->q;
  622. if (spin_trylock(q->queue_lock)) {
  623. blkg_destroy(blkg);
  624. spin_unlock(q->queue_lock);
  625. } else {
  626. spin_unlock_irq(&blkcg->lock);
  627. cpu_relax();
  628. spin_lock_irq(&blkcg->lock);
  629. }
  630. }
  631. spin_unlock_irq(&blkcg->lock);
  632. return 0;
  633. }
  634. static void blkiocg_destroy(struct cgroup *cgroup)
  635. {
  636. struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
  637. if (blkcg != &blkio_root_cgroup)
  638. kfree(blkcg);
  639. }
  640. static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
  641. {
  642. static atomic64_t id_seq = ATOMIC64_INIT(0);
  643. struct blkio_cgroup *blkcg;
  644. struct cgroup *parent = cgroup->parent;
  645. if (!parent) {
  646. blkcg = &blkio_root_cgroup;
  647. goto done;
  648. }
  649. blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
  650. if (!blkcg)
  651. return ERR_PTR(-ENOMEM);
  652. blkcg->weight = BLKIO_WEIGHT_DEFAULT;
  653. blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
  654. done:
  655. spin_lock_init(&blkcg->lock);
  656. INIT_HLIST_HEAD(&blkcg->blkg_list);
  657. return &blkcg->css;
  658. }
  659. /**
  660. * blkcg_init_queue - initialize blkcg part of request queue
  661. * @q: request_queue to initialize
  662. *
  663. * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
  664. * part of new request_queue @q.
  665. *
  666. * RETURNS:
  667. * 0 on success, -errno on failure.
  668. */
  669. int blkcg_init_queue(struct request_queue *q)
  670. {
  671. int ret;
  672. might_sleep();
  673. ret = blk_throtl_init(q);
  674. if (ret)
  675. return ret;
  676. mutex_lock(&all_q_mutex);
  677. INIT_LIST_HEAD(&q->all_q_node);
  678. list_add_tail(&q->all_q_node, &all_q_list);
  679. mutex_unlock(&all_q_mutex);
  680. return 0;
  681. }
  682. /**
  683. * blkcg_drain_queue - drain blkcg part of request_queue
  684. * @q: request_queue to drain
  685. *
  686. * Called from blk_drain_queue(). Responsible for draining blkcg part.
  687. */
  688. void blkcg_drain_queue(struct request_queue *q)
  689. {
  690. lockdep_assert_held(q->queue_lock);
  691. blk_throtl_drain(q);
  692. }
  693. /**
  694. * blkcg_exit_queue - exit and release blkcg part of request_queue
  695. * @q: request_queue being released
  696. *
  697. * Called from blk_release_queue(). Responsible for exiting blkcg part.
  698. */
  699. void blkcg_exit_queue(struct request_queue *q)
  700. {
  701. mutex_lock(&all_q_mutex);
  702. list_del_init(&q->all_q_node);
  703. mutex_unlock(&all_q_mutex);
  704. blkg_destroy_all(q, true);
  705. blk_throtl_exit(q);
  706. }
  707. /*
  708. * We cannot support shared io contexts, as we have no mean to support
  709. * two tasks with the same ioc in two different groups without major rework
  710. * of the main cic data structures. For now we allow a task to change
  711. * its cgroup only if it's the only owner of its ioc.
  712. */
  713. static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
  714. {
  715. struct task_struct *task;
  716. struct io_context *ioc;
  717. int ret = 0;
  718. /* task_lock() is needed to avoid races with exit_io_context() */
  719. cgroup_taskset_for_each(task, cgrp, tset) {
  720. task_lock(task);
  721. ioc = task->io_context;
  722. if (ioc && atomic_read(&ioc->nr_tasks) > 1)
  723. ret = -EINVAL;
  724. task_unlock(task);
  725. if (ret)
  726. break;
  727. }
  728. return ret;
  729. }
  730. static void blkcg_bypass_start(void)
  731. __acquires(&all_q_mutex)
  732. {
  733. struct request_queue *q;
  734. mutex_lock(&all_q_mutex);
  735. list_for_each_entry(q, &all_q_list, all_q_node) {
  736. blk_queue_bypass_start(q);
  737. blkg_destroy_all(q, false);
  738. }
  739. }
  740. static void blkcg_bypass_end(void)
  741. __releases(&all_q_mutex)
  742. {
  743. struct request_queue *q;
  744. list_for_each_entry(q, &all_q_list, all_q_node)
  745. blk_queue_bypass_end(q);
  746. mutex_unlock(&all_q_mutex);
  747. }
  748. struct cgroup_subsys blkio_subsys = {
  749. .name = "blkio",
  750. .create = blkiocg_create,
  751. .can_attach = blkiocg_can_attach,
  752. .pre_destroy = blkiocg_pre_destroy,
  753. .destroy = blkiocg_destroy,
  754. .subsys_id = blkio_subsys_id,
  755. .base_cftypes = blkio_files,
  756. .module = THIS_MODULE,
  757. };
  758. EXPORT_SYMBOL_GPL(blkio_subsys);
  759. void blkio_policy_register(struct blkio_policy_type *blkiop)
  760. {
  761. struct request_queue *q;
  762. blkcg_bypass_start();
  763. spin_lock(&blkio_list_lock);
  764. BUG_ON(blkio_policy[blkiop->plid]);
  765. blkio_policy[blkiop->plid] = blkiop;
  766. list_add_tail(&blkiop->list, &blkio_list);
  767. spin_unlock(&blkio_list_lock);
  768. list_for_each_entry(q, &all_q_list, all_q_node)
  769. update_root_blkg_pd(q, blkiop->plid);
  770. blkcg_bypass_end();
  771. if (blkiop->cftypes)
  772. WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
  773. }
  774. EXPORT_SYMBOL_GPL(blkio_policy_register);
  775. void blkio_policy_unregister(struct blkio_policy_type *blkiop)
  776. {
  777. struct request_queue *q;
  778. if (blkiop->cftypes)
  779. cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
  780. blkcg_bypass_start();
  781. spin_lock(&blkio_list_lock);
  782. BUG_ON(blkio_policy[blkiop->plid] != blkiop);
  783. blkio_policy[blkiop->plid] = NULL;
  784. list_del_init(&blkiop->list);
  785. spin_unlock(&blkio_list_lock);
  786. list_for_each_entry(q, &all_q_list, all_q_node)
  787. update_root_blkg_pd(q, blkiop->plid);
  788. blkcg_bypass_end();
  789. }
  790. EXPORT_SYMBOL_GPL(blkio_policy_unregister);