blk-cgroup.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. #ifndef _BLK_CGROUP_H
  2. #define _BLK_CGROUP_H
  3. /*
  4. * Common Block IO controller cgroup interface
  5. *
  6. * Based on ideas and code from CFQ, CFS and BFQ:
  7. * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
  8. *
  9. * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
  10. * Paolo Valente <paolo.valente@unimore.it>
  11. *
  12. * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
  13. * Nauman Rafique <nauman@google.com>
  14. */
  15. #include <linux/cgroup.h>
  16. #include <linux/u64_stats_sync.h>
  17. enum blkio_policy_id {
  18. BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */
  19. BLKIO_POLICY_THROTL, /* Throttling */
  20. BLKIO_NR_POLICIES,
  21. };
  22. /* Max limits for throttle policy */
  23. #define THROTL_IOPS_MAX UINT_MAX
  24. #ifdef CONFIG_BLK_CGROUP
  25. /* cft->private [un]packing for stat printing */
  26. #define BLKCG_STAT_PRIV(pol, off) (((unsigned)(pol) << 16) | (off))
  27. #define BLKCG_STAT_POL(prv) ((unsigned)(prv) >> 16)
  28. #define BLKCG_STAT_OFF(prv) ((unsigned)(prv) & 0xffff)
  29. enum blkg_rwstat_type {
  30. BLKG_RWSTAT_READ,
  31. BLKG_RWSTAT_WRITE,
  32. BLKG_RWSTAT_SYNC,
  33. BLKG_RWSTAT_ASYNC,
  34. BLKG_RWSTAT_NR,
  35. BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
  36. };
  37. /* blkg state flags */
  38. enum blkg_state_flags {
  39. BLKG_waiting = 0,
  40. BLKG_idling,
  41. BLKG_empty,
  42. };
  43. /* cgroup files owned by proportional weight policy */
  44. enum blkcg_file_name_prop {
  45. BLKIO_PROP_weight_device,
  46. };
  47. /* cgroup files owned by throttle policy */
  48. enum blkcg_file_name_throtl {
  49. BLKIO_THROTL_read_bps_device,
  50. BLKIO_THROTL_write_bps_device,
  51. BLKIO_THROTL_read_iops_device,
  52. BLKIO_THROTL_write_iops_device,
  53. };
  54. struct blkio_cgroup {
  55. struct cgroup_subsys_state css;
  56. unsigned int weight;
  57. spinlock_t lock;
  58. struct hlist_head blkg_list;
  59. /* for policies to test whether associated blkcg has changed */
  60. uint64_t id;
  61. };
  62. struct blkg_stat {
  63. struct u64_stats_sync syncp;
  64. uint64_t cnt;
  65. };
  66. struct blkg_rwstat {
  67. struct u64_stats_sync syncp;
  68. uint64_t cnt[BLKG_RWSTAT_NR];
  69. };
  70. struct blkio_group_stats {
  71. /* number of ios merged */
  72. struct blkg_rwstat merged;
  73. /* total time spent on device in ns, may not be accurate w/ queueing */
  74. struct blkg_rwstat service_time;
  75. /* total time spent waiting in scheduler queue in ns */
  76. struct blkg_rwstat wait_time;
  77. /* number of IOs queued up */
  78. struct blkg_rwstat queued;
  79. /* total disk time and nr sectors dispatched by this group */
  80. struct blkg_stat time;
  81. #ifdef CONFIG_DEBUG_BLK_CGROUP
  82. /* time not charged to this cgroup */
  83. struct blkg_stat unaccounted_time;
  84. /* sum of number of ios queued across all samples */
  85. struct blkg_stat avg_queue_size_sum;
  86. /* count of samples taken for average */
  87. struct blkg_stat avg_queue_size_samples;
  88. /* how many times this group has been removed from service tree */
  89. struct blkg_stat dequeue;
  90. /* total time spent waiting for it to be assigned a timeslice. */
  91. struct blkg_stat group_wait_time;
  92. /* time spent idling for this blkio_group */
  93. struct blkg_stat idle_time;
  94. /* total time with empty current active q with other requests queued */
  95. struct blkg_stat empty_time;
  96. /* fields after this shouldn't be cleared on stat reset */
  97. uint64_t start_group_wait_time;
  98. uint64_t start_idle_time;
  99. uint64_t start_empty_time;
  100. uint16_t flags;
  101. #endif
  102. };
  103. /* Per cpu blkio group stats */
  104. struct blkio_group_stats_cpu {
  105. /* total bytes transferred */
  106. struct blkg_rwstat service_bytes;
  107. /* total IOs serviced, post merge */
  108. struct blkg_rwstat serviced;
  109. /* total sectors transferred */
  110. struct blkg_stat sectors;
  111. };
  112. struct blkio_group_conf {
  113. unsigned int weight;
  114. u64 iops[2];
  115. u64 bps[2];
  116. };
  117. /* per-blkg per-policy data */
  118. struct blkg_policy_data {
  119. /* the blkg this per-policy data belongs to */
  120. struct blkio_group *blkg;
  121. /* Configuration */
  122. struct blkio_group_conf conf;
  123. struct blkio_group_stats stats;
  124. /* Per cpu stats pointer */
  125. struct blkio_group_stats_cpu __percpu *stats_cpu;
  126. /* pol->pdata_size bytes of private data used by policy impl */
  127. char pdata[] __aligned(__alignof__(unsigned long long));
  128. };
  129. struct blkio_group {
  130. /* Pointer to the associated request_queue */
  131. struct request_queue *q;
  132. struct list_head q_node;
  133. struct hlist_node blkcg_node;
  134. struct blkio_cgroup *blkcg;
  135. /* Store cgroup path */
  136. char path[128];
  137. /* reference count */
  138. int refcnt;
  139. struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
  140. /* List of blkg waiting for per cpu stats memory to be allocated */
  141. struct list_head alloc_node;
  142. struct rcu_head rcu_head;
  143. };
  144. typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
  145. typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
  146. struct blkio_group *blkg, unsigned int weight);
  147. typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
  148. struct blkio_group *blkg, u64 read_bps);
  149. typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
  150. struct blkio_group *blkg, u64 write_bps);
  151. typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
  152. struct blkio_group *blkg, unsigned int read_iops);
  153. typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
  154. struct blkio_group *blkg, unsigned int write_iops);
  155. struct blkio_policy_ops {
  156. blkio_init_group_fn *blkio_init_group_fn;
  157. blkio_update_group_weight_fn *blkio_update_group_weight_fn;
  158. blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
  159. blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
  160. blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
  161. blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
  162. };
  163. struct blkio_policy_type {
  164. struct list_head list;
  165. struct blkio_policy_ops ops;
  166. enum blkio_policy_id plid;
  167. size_t pdata_size; /* policy specific private data size */
  168. };
  169. extern int blkcg_init_queue(struct request_queue *q);
  170. extern void blkcg_drain_queue(struct request_queue *q);
  171. extern void blkcg_exit_queue(struct request_queue *q);
  172. /* Blkio controller policy registration */
  173. extern void blkio_policy_register(struct blkio_policy_type *);
  174. extern void blkio_policy_unregister(struct blkio_policy_type *);
  175. extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
  176. extern void update_root_blkg_pd(struct request_queue *q,
  177. enum blkio_policy_id plid);
  178. /**
  179. * blkg_to_pdata - get policy private data
  180. * @blkg: blkg of interest
  181. * @pol: policy of interest
  182. *
  183. * Return pointer to private data associated with the @blkg-@pol pair.
  184. */
  185. static inline void *blkg_to_pdata(struct blkio_group *blkg,
  186. struct blkio_policy_type *pol)
  187. {
  188. return blkg ? blkg->pd[pol->plid]->pdata : NULL;
  189. }
  190. /**
  191. * pdata_to_blkg - get blkg associated with policy private data
  192. * @pdata: policy private data of interest
  193. *
  194. * @pdata is policy private data. Determine the blkg it's associated with.
  195. */
  196. static inline struct blkio_group *pdata_to_blkg(void *pdata)
  197. {
  198. if (pdata) {
  199. struct blkg_policy_data *pd =
  200. container_of(pdata, struct blkg_policy_data, pdata);
  201. return pd->blkg;
  202. }
  203. return NULL;
  204. }
  205. static inline char *blkg_path(struct blkio_group *blkg)
  206. {
  207. return blkg->path;
  208. }
  209. /**
  210. * blkg_get - get a blkg reference
  211. * @blkg: blkg to get
  212. *
  213. * The caller should be holding queue_lock and an existing reference.
  214. */
  215. static inline void blkg_get(struct blkio_group *blkg)
  216. {
  217. lockdep_assert_held(blkg->q->queue_lock);
  218. WARN_ON_ONCE(!blkg->refcnt);
  219. blkg->refcnt++;
  220. }
  221. void __blkg_release(struct blkio_group *blkg);
  222. /**
  223. * blkg_put - put a blkg reference
  224. * @blkg: blkg to put
  225. *
  226. * The caller should be holding queue_lock.
  227. */
  228. static inline void blkg_put(struct blkio_group *blkg)
  229. {
  230. lockdep_assert_held(blkg->q->queue_lock);
  231. WARN_ON_ONCE(blkg->refcnt <= 0);
  232. if (!--blkg->refcnt)
  233. __blkg_release(blkg);
  234. }
  235. /**
  236. * blkg_stat_add - add a value to a blkg_stat
  237. * @stat: target blkg_stat
  238. * @val: value to add
  239. *
  240. * Add @val to @stat. The caller is responsible for synchronizing calls to
  241. * this function.
  242. */
  243. static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
  244. {
  245. u64_stats_update_begin(&stat->syncp);
  246. stat->cnt += val;
  247. u64_stats_update_end(&stat->syncp);
  248. }
  249. /**
  250. * blkg_stat_read - read the current value of a blkg_stat
  251. * @stat: blkg_stat to read
  252. *
  253. * Read the current value of @stat. This function can be called without
  254. * synchroniztion and takes care of u64 atomicity.
  255. */
  256. static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
  257. {
  258. unsigned int start;
  259. uint64_t v;
  260. do {
  261. start = u64_stats_fetch_begin(&stat->syncp);
  262. v = stat->cnt;
  263. } while (u64_stats_fetch_retry(&stat->syncp, start));
  264. return v;
  265. }
  266. /**
  267. * blkg_stat_reset - reset a blkg_stat
  268. * @stat: blkg_stat to reset
  269. */
  270. static inline void blkg_stat_reset(struct blkg_stat *stat)
  271. {
  272. stat->cnt = 0;
  273. }
  274. /**
  275. * blkg_rwstat_add - add a value to a blkg_rwstat
  276. * @rwstat: target blkg_rwstat
  277. * @rw: mask of REQ_{WRITE|SYNC}
  278. * @val: value to add
  279. *
  280. * Add @val to @rwstat. The counters are chosen according to @rw. The
  281. * caller is responsible for synchronizing calls to this function.
  282. */
  283. static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
  284. int rw, uint64_t val)
  285. {
  286. u64_stats_update_begin(&rwstat->syncp);
  287. if (rw & REQ_WRITE)
  288. rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
  289. else
  290. rwstat->cnt[BLKG_RWSTAT_READ] += val;
  291. if (rw & REQ_SYNC)
  292. rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
  293. else
  294. rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
  295. u64_stats_update_end(&rwstat->syncp);
  296. }
  297. /**
  298. * blkg_rwstat_read - read the current values of a blkg_rwstat
  299. * @rwstat: blkg_rwstat to read
  300. *
  301. * Read the current snapshot of @rwstat and return it as the return value.
  302. * This function can be called without synchronization and takes care of
  303. * u64 atomicity.
  304. */
  305. static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
  306. {
  307. unsigned int start;
  308. struct blkg_rwstat tmp;
  309. do {
  310. start = u64_stats_fetch_begin(&rwstat->syncp);
  311. tmp = *rwstat;
  312. } while (u64_stats_fetch_retry(&rwstat->syncp, start));
  313. return tmp;
  314. }
  315. /**
  316. * blkg_rwstat_sum - read the total count of a blkg_rwstat
  317. * @rwstat: blkg_rwstat to read
  318. *
  319. * Return the total count of @rwstat regardless of the IO direction. This
  320. * function can be called without synchronization and takes care of u64
  321. * atomicity.
  322. */
  323. static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
  324. {
  325. struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
  326. return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
  327. }
  328. /**
  329. * blkg_rwstat_reset - reset a blkg_rwstat
  330. * @rwstat: blkg_rwstat to reset
  331. */
  332. static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
  333. {
  334. memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
  335. }
  336. #else
  337. struct blkio_group {
  338. };
  339. struct blkio_policy_type {
  340. };
  341. static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
  342. static inline void blkcg_drain_queue(struct request_queue *q) { }
  343. static inline void blkcg_exit_queue(struct request_queue *q) { }
  344. static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
  345. static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
  346. static inline void blkg_destroy_all(struct request_queue *q,
  347. bool destory_root) { }
  348. static inline void update_root_blkg_pd(struct request_queue *q,
  349. enum blkio_policy_id plid) { }
  350. static inline void *blkg_to_pdata(struct blkio_group *blkg,
  351. struct blkio_policy_type *pol) { return NULL; }
  352. static inline struct blkio_group *pdata_to_blkg(void *pdata,
  353. struct blkio_policy_type *pol) { return NULL; }
  354. static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
  355. static inline void blkg_get(struct blkio_group *blkg) { }
  356. static inline void blkg_put(struct blkio_group *blkg) { }
  357. #endif
  358. #define BLKIO_WEIGHT_MIN 10
  359. #define BLKIO_WEIGHT_MAX 1000
  360. #define BLKIO_WEIGHT_DEFAULT 500
  361. #ifdef CONFIG_DEBUG_BLK_CGROUP
  362. void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
  363. struct blkio_policy_type *pol);
  364. void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
  365. struct blkio_policy_type *pol,
  366. unsigned long dequeue);
  367. void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
  368. struct blkio_policy_type *pol);
  369. void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
  370. struct blkio_policy_type *pol);
  371. void blkiocg_set_start_empty_time(struct blkio_group *blkg,
  372. struct blkio_policy_type *pol);
  373. #define BLKG_FLAG_FNS(name) \
  374. static inline void blkio_mark_blkg_##name( \
  375. struct blkio_group_stats *stats) \
  376. { \
  377. stats->flags |= (1 << BLKG_##name); \
  378. } \
  379. static inline void blkio_clear_blkg_##name( \
  380. struct blkio_group_stats *stats) \
  381. { \
  382. stats->flags &= ~(1 << BLKG_##name); \
  383. } \
  384. static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \
  385. { \
  386. return (stats->flags & (1 << BLKG_##name)) != 0; \
  387. } \
  388. BLKG_FLAG_FNS(waiting)
  389. BLKG_FLAG_FNS(idling)
  390. BLKG_FLAG_FNS(empty)
  391. #undef BLKG_FLAG_FNS
  392. #else
  393. static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
  394. struct blkio_policy_type *pol) { }
  395. static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
  396. struct blkio_policy_type *pol, unsigned long dequeue) { }
  397. static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
  398. struct blkio_policy_type *pol) { }
  399. static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
  400. struct blkio_policy_type *pol) { }
  401. static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
  402. struct blkio_policy_type *pol) { }
  403. #endif
  404. #ifdef CONFIG_BLK_CGROUP
  405. extern struct blkio_cgroup blkio_root_cgroup;
  406. extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
  407. extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
  408. extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  409. struct request_queue *q);
  410. struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
  411. struct request_queue *q,
  412. bool for_root);
  413. void blkiocg_update_timeslice_used(struct blkio_group *blkg,
  414. struct blkio_policy_type *pol,
  415. unsigned long time,
  416. unsigned long unaccounted_time);
  417. void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
  418. struct blkio_policy_type *pol,
  419. uint64_t bytes, bool direction, bool sync);
  420. void blkiocg_update_completion_stats(struct blkio_group *blkg,
  421. struct blkio_policy_type *pol,
  422. uint64_t start_time,
  423. uint64_t io_start_time, bool direction,
  424. bool sync);
  425. void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
  426. struct blkio_policy_type *pol,
  427. bool direction, bool sync);
  428. void blkiocg_update_io_add_stats(struct blkio_group *blkg,
  429. struct blkio_policy_type *pol,
  430. struct blkio_group *curr_blkg, bool direction,
  431. bool sync);
  432. void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
  433. struct blkio_policy_type *pol,
  434. bool direction, bool sync);
  435. #else
  436. struct cgroup;
  437. static inline struct blkio_cgroup *
  438. cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
  439. static inline struct blkio_cgroup *
  440. bio_blkio_cgroup(struct bio *bio) { return NULL; }
  441. static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
  442. void *key) { return NULL; }
  443. static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
  444. struct blkio_policy_type *pol, unsigned long time,
  445. unsigned long unaccounted_time) { }
  446. static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
  447. struct blkio_policy_type *pol, uint64_t bytes,
  448. bool direction, bool sync) { }
  449. static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
  450. struct blkio_policy_type *pol, uint64_t start_time,
  451. uint64_t io_start_time, bool direction, bool sync) { }
  452. static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
  453. struct blkio_policy_type *pol, bool direction,
  454. bool sync) { }
  455. static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
  456. struct blkio_policy_type *pol,
  457. struct blkio_group *curr_blkg, bool direction,
  458. bool sync) { }
  459. static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
  460. struct blkio_policy_type *pol, bool direction,
  461. bool sync) { }
  462. #endif
  463. #endif /* _BLK_CGROUP_H */