deadline-iosched.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. /*
  2. * Deadline i/o scheduler.
  3. *
  4. * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/fs.h>
  8. #include <linux/blkdev.h>
  9. #include <linux/elevator.h>
  10. #include <linux/bio.h>
  11. #include <linux/module.h>
  12. #include <linux/slab.h>
  13. #include <linux/init.h>
  14. #include <linux/compiler.h>
  15. #include <linux/rbtree.h>
  16. /*
  17. * See Documentation/block/deadline-iosched.txt
  18. */
  19. static const int read_expire = HZ / 2; /* max time before a read is submitted. */
  20. static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
  21. static const int writes_starved = 2; /* max times reads can starve a write */
  22. static const int fifo_batch = 16; /* # of sequential requests treated as one
  23. by the above parameters. For throughput. */
  24. struct deadline_data {
  25. /*
  26. * run time data
  27. */
  28. /*
  29. * requests (deadline_rq s) are present on both sort_list and fifo_list
  30. */
  31. struct rb_root sort_list[2];
  32. struct list_head fifo_list[2];
  33. /*
  34. * next in sort order. read, write or both are NULL
  35. */
  36. struct deadline_rq *next_drq[2];
  37. unsigned int batching; /* number of sequential requests made */
  38. sector_t last_sector; /* head position */
  39. unsigned int starved; /* times reads have starved writes */
  40. /*
  41. * settings that change how the i/o scheduler behaves
  42. */
  43. int fifo_expire[2];
  44. int fifo_batch;
  45. int writes_starved;
  46. int front_merges;
  47. mempool_t *drq_pool;
  48. };
  49. /*
  50. * pre-request data.
  51. */
  52. struct deadline_rq {
  53. /*
  54. * rbtree index, key is the starting offset
  55. */
  56. struct rb_node rb_node;
  57. sector_t rb_key;
  58. struct request *request;
  59. /*
  60. * expire fifo
  61. */
  62. struct list_head fifo;
  63. unsigned long expires;
  64. };
  65. static void deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq);
  66. static kmem_cache_t *drq_pool;
  67. #define RQ_DATA(rq) ((struct deadline_rq *) (rq)->elevator_private)
  68. /*
  69. * rb tree support functions
  70. */
  71. #define rb_entry_drq(node) rb_entry((node), struct deadline_rq, rb_node)
  72. #define DRQ_RB_ROOT(dd, drq) (&(dd)->sort_list[rq_data_dir((drq)->request)])
  73. #define rq_rb_key(rq) (rq)->sector
  74. static struct deadline_rq *
  75. __deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
  76. {
  77. struct rb_node **p = &DRQ_RB_ROOT(dd, drq)->rb_node;
  78. struct rb_node *parent = NULL;
  79. struct deadline_rq *__drq;
  80. while (*p) {
  81. parent = *p;
  82. __drq = rb_entry_drq(parent);
  83. if (drq->rb_key < __drq->rb_key)
  84. p = &(*p)->rb_left;
  85. else if (drq->rb_key > __drq->rb_key)
  86. p = &(*p)->rb_right;
  87. else
  88. return __drq;
  89. }
  90. rb_link_node(&drq->rb_node, parent, p);
  91. return NULL;
  92. }
  93. static void
  94. deadline_add_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
  95. {
  96. struct deadline_rq *__alias;
  97. drq->rb_key = rq_rb_key(drq->request);
  98. retry:
  99. __alias = __deadline_add_drq_rb(dd, drq);
  100. if (!__alias) {
  101. rb_insert_color(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
  102. return;
  103. }
  104. deadline_move_request(dd, __alias);
  105. goto retry;
  106. }
  107. static inline void
  108. deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
  109. {
  110. const int data_dir = rq_data_dir(drq->request);
  111. if (dd->next_drq[data_dir] == drq) {
  112. struct rb_node *rbnext = rb_next(&drq->rb_node);
  113. dd->next_drq[data_dir] = NULL;
  114. if (rbnext)
  115. dd->next_drq[data_dir] = rb_entry_drq(rbnext);
  116. }
  117. BUG_ON(!RB_EMPTY_NODE(&drq->rb_node));
  118. rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
  119. RB_CLEAR_NODE(&drq->rb_node);
  120. }
  121. static struct request *
  122. deadline_find_drq_rb(struct deadline_data *dd, sector_t sector, int data_dir)
  123. {
  124. struct rb_node *n = dd->sort_list[data_dir].rb_node;
  125. struct deadline_rq *drq;
  126. while (n) {
  127. drq = rb_entry_drq(n);
  128. if (sector < drq->rb_key)
  129. n = n->rb_left;
  130. else if (sector > drq->rb_key)
  131. n = n->rb_right;
  132. else
  133. return drq->request;
  134. }
  135. return NULL;
  136. }
  137. /*
  138. * deadline_find_first_drq finds the first (lowest sector numbered) request
  139. * for the specified data_dir. Used to sweep back to the start of the disk
  140. * (1-way elevator) after we process the last (highest sector) request.
  141. */
  142. static struct deadline_rq *
  143. deadline_find_first_drq(struct deadline_data *dd, int data_dir)
  144. {
  145. struct rb_node *n = dd->sort_list[data_dir].rb_node;
  146. for (;;) {
  147. if (n->rb_left == NULL)
  148. return rb_entry_drq(n);
  149. n = n->rb_left;
  150. }
  151. }
  152. /*
  153. * add drq to rbtree and fifo
  154. */
  155. static void
  156. deadline_add_request(struct request_queue *q, struct request *rq)
  157. {
  158. struct deadline_data *dd = q->elevator->elevator_data;
  159. struct deadline_rq *drq = RQ_DATA(rq);
  160. const int data_dir = rq_data_dir(drq->request);
  161. deadline_add_drq_rb(dd, drq);
  162. /*
  163. * set expire time (only used for reads) and add to fifo list
  164. */
  165. drq->expires = jiffies + dd->fifo_expire[data_dir];
  166. list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
  167. }
  168. /*
  169. * remove rq from rbtree and fifo.
  170. */
  171. static void deadline_remove_request(request_queue_t *q, struct request *rq)
  172. {
  173. struct deadline_rq *drq = RQ_DATA(rq);
  174. struct deadline_data *dd = q->elevator->elevator_data;
  175. list_del_init(&drq->fifo);
  176. deadline_del_drq_rb(dd, drq);
  177. }
  178. static int
  179. deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
  180. {
  181. struct deadline_data *dd = q->elevator->elevator_data;
  182. struct request *__rq;
  183. int ret;
  184. /*
  185. * check for front merge
  186. */
  187. if (dd->front_merges) {
  188. sector_t rb_key = bio->bi_sector + bio_sectors(bio);
  189. __rq = deadline_find_drq_rb(dd, rb_key, bio_data_dir(bio));
  190. if (__rq) {
  191. BUG_ON(rb_key != rq_rb_key(__rq));
  192. if (elv_rq_merge_ok(__rq, bio)) {
  193. ret = ELEVATOR_FRONT_MERGE;
  194. goto out;
  195. }
  196. }
  197. }
  198. return ELEVATOR_NO_MERGE;
  199. out:
  200. *req = __rq;
  201. return ret;
  202. }
  203. static void deadline_merged_request(request_queue_t *q, struct request *req)
  204. {
  205. struct deadline_data *dd = q->elevator->elevator_data;
  206. struct deadline_rq *drq = RQ_DATA(req);
  207. /*
  208. * if the merge was a front merge, we need to reposition request
  209. */
  210. if (rq_rb_key(req) != drq->rb_key) {
  211. deadline_del_drq_rb(dd, drq);
  212. deadline_add_drq_rb(dd, drq);
  213. }
  214. }
  215. static void
  216. deadline_merged_requests(request_queue_t *q, struct request *req,
  217. struct request *next)
  218. {
  219. struct deadline_data *dd = q->elevator->elevator_data;
  220. struct deadline_rq *drq = RQ_DATA(req);
  221. struct deadline_rq *dnext = RQ_DATA(next);
  222. BUG_ON(!drq);
  223. BUG_ON(!dnext);
  224. if (rq_rb_key(req) != drq->rb_key) {
  225. deadline_del_drq_rb(dd, drq);
  226. deadline_add_drq_rb(dd, drq);
  227. }
  228. /*
  229. * if dnext expires before drq, assign its expire time to drq
  230. * and move into dnext position (dnext will be deleted) in fifo
  231. */
  232. if (!list_empty(&drq->fifo) && !list_empty(&dnext->fifo)) {
  233. if (time_before(dnext->expires, drq->expires)) {
  234. list_move(&drq->fifo, &dnext->fifo);
  235. drq->expires = dnext->expires;
  236. }
  237. }
  238. /*
  239. * kill knowledge of next, this one is a goner
  240. */
  241. deadline_remove_request(q, next);
  242. }
  243. /*
  244. * move request from sort list to dispatch queue.
  245. */
  246. static inline void
  247. deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
  248. {
  249. request_queue_t *q = drq->request->q;
  250. deadline_remove_request(q, drq->request);
  251. elv_dispatch_add_tail(q, drq->request);
  252. }
  253. /*
  254. * move an entry to dispatch queue
  255. */
  256. static void
  257. deadline_move_request(struct deadline_data *dd, struct deadline_rq *drq)
  258. {
  259. const int data_dir = rq_data_dir(drq->request);
  260. struct rb_node *rbnext = rb_next(&drq->rb_node);
  261. dd->next_drq[READ] = NULL;
  262. dd->next_drq[WRITE] = NULL;
  263. if (rbnext)
  264. dd->next_drq[data_dir] = rb_entry_drq(rbnext);
  265. dd->last_sector = drq->request->sector + drq->request->nr_sectors;
  266. /*
  267. * take it off the sort and fifo list, move
  268. * to dispatch queue
  269. */
  270. deadline_move_to_dispatch(dd, drq);
  271. }
  272. #define list_entry_fifo(ptr) list_entry((ptr), struct deadline_rq, fifo)
  273. /*
  274. * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
  275. * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
  276. */
  277. static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
  278. {
  279. struct deadline_rq *drq = list_entry_fifo(dd->fifo_list[ddir].next);
  280. /*
  281. * drq is expired!
  282. */
  283. if (time_after(jiffies, drq->expires))
  284. return 1;
  285. return 0;
  286. }
  287. /*
  288. * deadline_dispatch_requests selects the best request according to
  289. * read/write expire, fifo_batch, etc
  290. */
  291. static int deadline_dispatch_requests(request_queue_t *q, int force)
  292. {
  293. struct deadline_data *dd = q->elevator->elevator_data;
  294. const int reads = !list_empty(&dd->fifo_list[READ]);
  295. const int writes = !list_empty(&dd->fifo_list[WRITE]);
  296. struct deadline_rq *drq;
  297. int data_dir;
  298. /*
  299. * batches are currently reads XOR writes
  300. */
  301. if (dd->next_drq[WRITE])
  302. drq = dd->next_drq[WRITE];
  303. else
  304. drq = dd->next_drq[READ];
  305. if (drq) {
  306. /* we have a "next request" */
  307. if (dd->last_sector != drq->request->sector)
  308. /* end the batch on a non sequential request */
  309. dd->batching += dd->fifo_batch;
  310. if (dd->batching < dd->fifo_batch)
  311. /* we are still entitled to batch */
  312. goto dispatch_request;
  313. }
  314. /*
  315. * at this point we are not running a batch. select the appropriate
  316. * data direction (read / write)
  317. */
  318. if (reads) {
  319. BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
  320. if (writes && (dd->starved++ >= dd->writes_starved))
  321. goto dispatch_writes;
  322. data_dir = READ;
  323. goto dispatch_find_request;
  324. }
  325. /*
  326. * there are either no reads or writes have been starved
  327. */
  328. if (writes) {
  329. dispatch_writes:
  330. BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
  331. dd->starved = 0;
  332. data_dir = WRITE;
  333. goto dispatch_find_request;
  334. }
  335. return 0;
  336. dispatch_find_request:
  337. /*
  338. * we are not running a batch, find best request for selected data_dir
  339. */
  340. if (deadline_check_fifo(dd, data_dir)) {
  341. /* An expired request exists - satisfy it */
  342. dd->batching = 0;
  343. drq = list_entry_fifo(dd->fifo_list[data_dir].next);
  344. } else if (dd->next_drq[data_dir]) {
  345. /*
  346. * The last req was the same dir and we have a next request in
  347. * sort order. No expired requests so continue on from here.
  348. */
  349. drq = dd->next_drq[data_dir];
  350. } else {
  351. /*
  352. * The last req was the other direction or we have run out of
  353. * higher-sectored requests. Go back to the lowest sectored
  354. * request (1 way elevator) and start a new batch.
  355. */
  356. dd->batching = 0;
  357. drq = deadline_find_first_drq(dd, data_dir);
  358. }
  359. dispatch_request:
  360. /*
  361. * drq is the selected appropriate request.
  362. */
  363. dd->batching++;
  364. deadline_move_request(dd, drq);
  365. return 1;
  366. }
  367. static int deadline_queue_empty(request_queue_t *q)
  368. {
  369. struct deadline_data *dd = q->elevator->elevator_data;
  370. return list_empty(&dd->fifo_list[WRITE])
  371. && list_empty(&dd->fifo_list[READ]);
  372. }
  373. static struct request *
  374. deadline_former_request(request_queue_t *q, struct request *rq)
  375. {
  376. struct deadline_rq *drq = RQ_DATA(rq);
  377. struct rb_node *rbprev = rb_prev(&drq->rb_node);
  378. if (rbprev)
  379. return rb_entry_drq(rbprev)->request;
  380. return NULL;
  381. }
  382. static struct request *
  383. deadline_latter_request(request_queue_t *q, struct request *rq)
  384. {
  385. struct deadline_rq *drq = RQ_DATA(rq);
  386. struct rb_node *rbnext = rb_next(&drq->rb_node);
  387. if (rbnext)
  388. return rb_entry_drq(rbnext)->request;
  389. return NULL;
  390. }
  391. static void deadline_exit_queue(elevator_t *e)
  392. {
  393. struct deadline_data *dd = e->elevator_data;
  394. BUG_ON(!list_empty(&dd->fifo_list[READ]));
  395. BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
  396. mempool_destroy(dd->drq_pool);
  397. kfree(dd);
  398. }
  399. /*
  400. * initialize elevator private data (deadline_data), and alloc a drq for
  401. * each request on the free lists
  402. */
  403. static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
  404. {
  405. struct deadline_data *dd;
  406. if (!drq_pool)
  407. return NULL;
  408. dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
  409. if (!dd)
  410. return NULL;
  411. memset(dd, 0, sizeof(*dd));
  412. dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
  413. mempool_free_slab, drq_pool, q->node);
  414. if (!dd->drq_pool) {
  415. kfree(dd);
  416. return NULL;
  417. }
  418. INIT_LIST_HEAD(&dd->fifo_list[READ]);
  419. INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
  420. dd->sort_list[READ] = RB_ROOT;
  421. dd->sort_list[WRITE] = RB_ROOT;
  422. dd->fifo_expire[READ] = read_expire;
  423. dd->fifo_expire[WRITE] = write_expire;
  424. dd->writes_starved = writes_starved;
  425. dd->front_merges = 1;
  426. dd->fifo_batch = fifo_batch;
  427. return dd;
  428. }
  429. static void deadline_put_request(request_queue_t *q, struct request *rq)
  430. {
  431. struct deadline_data *dd = q->elevator->elevator_data;
  432. struct deadline_rq *drq = RQ_DATA(rq);
  433. mempool_free(drq, dd->drq_pool);
  434. rq->elevator_private = NULL;
  435. }
  436. static int
  437. deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
  438. gfp_t gfp_mask)
  439. {
  440. struct deadline_data *dd = q->elevator->elevator_data;
  441. struct deadline_rq *drq;
  442. drq = mempool_alloc(dd->drq_pool, gfp_mask);
  443. if (drq) {
  444. memset(drq, 0, sizeof(*drq));
  445. RB_CLEAR_NODE(&drq->rb_node);
  446. drq->request = rq;
  447. INIT_LIST_HEAD(&drq->fifo);
  448. rq->elevator_private = drq;
  449. return 0;
  450. }
  451. return 1;
  452. }
  453. /*
  454. * sysfs parts below
  455. */
  456. static ssize_t
  457. deadline_var_show(int var, char *page)
  458. {
  459. return sprintf(page, "%d\n", var);
  460. }
  461. static ssize_t
  462. deadline_var_store(int *var, const char *page, size_t count)
  463. {
  464. char *p = (char *) page;
  465. *var = simple_strtol(p, &p, 10);
  466. return count;
  467. }
  468. #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
  469. static ssize_t __FUNC(elevator_t *e, char *page) \
  470. { \
  471. struct deadline_data *dd = e->elevator_data; \
  472. int __data = __VAR; \
  473. if (__CONV) \
  474. __data = jiffies_to_msecs(__data); \
  475. return deadline_var_show(__data, (page)); \
  476. }
  477. SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
  478. SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
  479. SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
  480. SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
  481. SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
  482. #undef SHOW_FUNCTION
  483. #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
  484. static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \
  485. { \
  486. struct deadline_data *dd = e->elevator_data; \
  487. int __data; \
  488. int ret = deadline_var_store(&__data, (page), count); \
  489. if (__data < (MIN)) \
  490. __data = (MIN); \
  491. else if (__data > (MAX)) \
  492. __data = (MAX); \
  493. if (__CONV) \
  494. *(__PTR) = msecs_to_jiffies(__data); \
  495. else \
  496. *(__PTR) = __data; \
  497. return ret; \
  498. }
  499. STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
  500. STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
  501. STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
  502. STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
  503. STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
  504. #undef STORE_FUNCTION
  505. #define DD_ATTR(name) \
  506. __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
  507. deadline_##name##_store)
  508. static struct elv_fs_entry deadline_attrs[] = {
  509. DD_ATTR(read_expire),
  510. DD_ATTR(write_expire),
  511. DD_ATTR(writes_starved),
  512. DD_ATTR(front_merges),
  513. DD_ATTR(fifo_batch),
  514. __ATTR_NULL
  515. };
  516. static struct elevator_type iosched_deadline = {
  517. .ops = {
  518. .elevator_merge_fn = deadline_merge,
  519. .elevator_merged_fn = deadline_merged_request,
  520. .elevator_merge_req_fn = deadline_merged_requests,
  521. .elevator_dispatch_fn = deadline_dispatch_requests,
  522. .elevator_add_req_fn = deadline_add_request,
  523. .elevator_queue_empty_fn = deadline_queue_empty,
  524. .elevator_former_req_fn = deadline_former_request,
  525. .elevator_latter_req_fn = deadline_latter_request,
  526. .elevator_set_req_fn = deadline_set_request,
  527. .elevator_put_req_fn = deadline_put_request,
  528. .elevator_init_fn = deadline_init_queue,
  529. .elevator_exit_fn = deadline_exit_queue,
  530. },
  531. .elevator_attrs = deadline_attrs,
  532. .elevator_name = "deadline",
  533. .elevator_owner = THIS_MODULE,
  534. };
  535. static int __init deadline_init(void)
  536. {
  537. int ret;
  538. drq_pool = kmem_cache_create("deadline_drq", sizeof(struct deadline_rq),
  539. 0, 0, NULL, NULL);
  540. if (!drq_pool)
  541. return -ENOMEM;
  542. ret = elv_register(&iosched_deadline);
  543. if (ret)
  544. kmem_cache_destroy(drq_pool);
  545. return ret;
  546. }
  547. static void __exit deadline_exit(void)
  548. {
  549. kmem_cache_destroy(drq_pool);
  550. elv_unregister(&iosched_deadline);
  551. }
  552. module_init(deadline_init);
  553. module_exit(deadline_exit);
  554. MODULE_AUTHOR("Jens Axboe");
  555. MODULE_LICENSE("GPL");
  556. MODULE_DESCRIPTION("deadline IO scheduler");