elevator.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. /*
  2. * Block device elevator/IO-scheduler.
  3. *
  4. * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  5. *
  6. * 30042000 Jens Axboe <axboe@suse.de> :
  7. *
  8. * Split the elevator a bit so that it is possible to choose a different
  9. * one or even write a new "plug in". There are three pieces:
  10. * - elevator_fn, inserts a new request in the queue list
  11. * - elevator_merge_fn, decides whether a new buffer can be merged with
  12. * an existing request
  13. * - elevator_dequeue_fn, called when a request is taken off the active list
  14. *
  15. * 20082000 Dave Jones <davej@suse.de> :
  16. * Removed tests for max-bomb-segments, which was breaking elvtune
  17. * when run without -bN
  18. *
  19. * Jens:
  20. * - Rework again to work with bio instead of buffer_heads
  21. * - loose bi_dev comparisons, partition handling is right now
  22. * - completely modularize elevator setup and teardown
  23. *
  24. */
  25. #include <linux/kernel.h>
  26. #include <linux/fs.h>
  27. #include <linux/blkdev.h>
  28. #include <linux/elevator.h>
  29. #include <linux/bio.h>
  30. #include <linux/module.h>
  31. #include <linux/slab.h>
  32. #include <linux/init.h>
  33. #include <linux/compiler.h>
  34. #include <linux/delay.h>
  35. #include <linux/blktrace_api.h>
  36. #include <asm/uaccess.h>
  37. static DEFINE_SPINLOCK(elv_list_lock);
  38. static LIST_HEAD(elv_list);
  39. /*
  40. * can we safely merge with this request?
  41. */
  42. inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
  43. {
  44. if (!rq_mergeable(rq))
  45. return 0;
  46. /*
  47. * different data direction or already started, don't merge
  48. */
  49. if (bio_data_dir(bio) != rq_data_dir(rq))
  50. return 0;
  51. /*
  52. * same device and no special stuff set, merge is ok
  53. */
  54. if (rq->rq_disk == bio->bi_bdev->bd_disk &&
  55. !rq->waiting && !rq->special)
  56. return 1;
  57. return 0;
  58. }
  59. EXPORT_SYMBOL(elv_rq_merge_ok);
  60. static inline int elv_try_merge(struct request *__rq, struct bio *bio)
  61. {
  62. int ret = ELEVATOR_NO_MERGE;
  63. /*
  64. * we can merge and sequence is ok, check if it's possible
  65. */
  66. if (elv_rq_merge_ok(__rq, bio)) {
  67. if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
  68. ret = ELEVATOR_BACK_MERGE;
  69. else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
  70. ret = ELEVATOR_FRONT_MERGE;
  71. }
  72. return ret;
  73. }
  74. static struct elevator_type *elevator_find(const char *name)
  75. {
  76. struct elevator_type *e = NULL;
  77. struct list_head *entry;
  78. list_for_each(entry, &elv_list) {
  79. struct elevator_type *__e;
  80. __e = list_entry(entry, struct elevator_type, list);
  81. if (!strcmp(__e->elevator_name, name)) {
  82. e = __e;
  83. break;
  84. }
  85. }
  86. return e;
  87. }
  88. static void elevator_put(struct elevator_type *e)
  89. {
  90. module_put(e->elevator_owner);
  91. }
  92. static struct elevator_type *elevator_get(const char *name)
  93. {
  94. struct elevator_type *e;
  95. spin_lock_irq(&elv_list_lock);
  96. e = elevator_find(name);
  97. if (e && !try_module_get(e->elevator_owner))
  98. e = NULL;
  99. spin_unlock_irq(&elv_list_lock);
  100. return e;
  101. }
  102. static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
  103. {
  104. return eq->ops->elevator_init_fn(q, eq);
  105. }
  106. static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
  107. void *data)
  108. {
  109. q->elevator = eq;
  110. eq->elevator_data = data;
  111. }
  112. static char chosen_elevator[16];
  113. static int __init elevator_setup(char *str)
  114. {
  115. /*
  116. * Be backwards-compatible with previous kernels, so users
  117. * won't get the wrong elevator.
  118. */
  119. if (!strcmp(str, "as"))
  120. strcpy(chosen_elevator, "anticipatory");
  121. else
  122. strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
  123. return 1;
  124. }
  125. __setup("elevator=", elevator_setup);
  126. static struct kobj_type elv_ktype;
  127. static elevator_t *elevator_alloc(struct elevator_type *e)
  128. {
  129. elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
  130. if (eq) {
  131. memset(eq, 0, sizeof(*eq));
  132. eq->ops = &e->ops;
  133. eq->elevator_type = e;
  134. kobject_init(&eq->kobj);
  135. snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
  136. eq->kobj.ktype = &elv_ktype;
  137. mutex_init(&eq->sysfs_lock);
  138. } else {
  139. elevator_put(e);
  140. }
  141. return eq;
  142. }
  143. static void elevator_release(struct kobject *kobj)
  144. {
  145. elevator_t *e = container_of(kobj, elevator_t, kobj);
  146. elevator_put(e->elevator_type);
  147. kfree(e);
  148. }
  149. int elevator_init(request_queue_t *q, char *name)
  150. {
  151. struct elevator_type *e = NULL;
  152. struct elevator_queue *eq;
  153. int ret = 0;
  154. void *data;
  155. INIT_LIST_HEAD(&q->queue_head);
  156. q->last_merge = NULL;
  157. q->end_sector = 0;
  158. q->boundary_rq = NULL;
  159. if (name && !(e = elevator_get(name)))
  160. return -EINVAL;
  161. if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
  162. printk("I/O scheduler %s not found\n", chosen_elevator);
  163. if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
  164. printk("Default I/O scheduler not found, using no-op\n");
  165. e = elevator_get("noop");
  166. }
  167. eq = elevator_alloc(e);
  168. if (!eq)
  169. return -ENOMEM;
  170. data = elevator_init_queue(q, eq);
  171. if (!data) {
  172. kobject_put(&eq->kobj);
  173. return -ENOMEM;
  174. }
  175. elevator_attach(q, eq, data);
  176. return ret;
  177. }
  178. void elevator_exit(elevator_t *e)
  179. {
  180. mutex_lock(&e->sysfs_lock);
  181. if (e->ops->elevator_exit_fn)
  182. e->ops->elevator_exit_fn(e);
  183. e->ops = NULL;
  184. mutex_unlock(&e->sysfs_lock);
  185. kobject_put(&e->kobj);
  186. }
  187. /*
  188. * Insert rq into dispatch queue of q. Queue lock must be held on
  189. * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
  190. * appended to the dispatch queue. To be used by specific elevators.
  191. */
  192. void elv_dispatch_sort(request_queue_t *q, struct request *rq)
  193. {
  194. sector_t boundary;
  195. struct list_head *entry;
  196. if (q->last_merge == rq)
  197. q->last_merge = NULL;
  198. q->nr_sorted--;
  199. boundary = q->end_sector;
  200. list_for_each_prev(entry, &q->queue_head) {
  201. struct request *pos = list_entry_rq(entry);
  202. if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
  203. break;
  204. if (rq->sector >= boundary) {
  205. if (pos->sector < boundary)
  206. continue;
  207. } else {
  208. if (pos->sector >= boundary)
  209. break;
  210. }
  211. if (rq->sector >= pos->sector)
  212. break;
  213. }
  214. list_add(&rq->queuelist, entry);
  215. }
  216. int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
  217. {
  218. elevator_t *e = q->elevator;
  219. int ret;
  220. if (q->last_merge) {
  221. ret = elv_try_merge(q->last_merge, bio);
  222. if (ret != ELEVATOR_NO_MERGE) {
  223. *req = q->last_merge;
  224. return ret;
  225. }
  226. }
  227. if (e->ops->elevator_merge_fn)
  228. return e->ops->elevator_merge_fn(q, req, bio);
  229. return ELEVATOR_NO_MERGE;
  230. }
  231. void elv_merged_request(request_queue_t *q, struct request *rq)
  232. {
  233. elevator_t *e = q->elevator;
  234. if (e->ops->elevator_merged_fn)
  235. e->ops->elevator_merged_fn(q, rq);
  236. q->last_merge = rq;
  237. }
  238. void elv_merge_requests(request_queue_t *q, struct request *rq,
  239. struct request *next)
  240. {
  241. elevator_t *e = q->elevator;
  242. if (e->ops->elevator_merge_req_fn)
  243. e->ops->elevator_merge_req_fn(q, rq, next);
  244. q->nr_sorted--;
  245. q->last_merge = rq;
  246. }
  247. void elv_requeue_request(request_queue_t *q, struct request *rq)
  248. {
  249. elevator_t *e = q->elevator;
  250. /*
  251. * it already went through dequeue, we need to decrement the
  252. * in_flight count again
  253. */
  254. if (blk_account_rq(rq)) {
  255. q->in_flight--;
  256. if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
  257. e->ops->elevator_deactivate_req_fn(q, rq);
  258. }
  259. rq->flags &= ~REQ_STARTED;
  260. elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
  261. }
  262. static void elv_drain_elevator(request_queue_t *q)
  263. {
  264. static int printed;
  265. while (q->elevator->ops->elevator_dispatch_fn(q, 1))
  266. ;
  267. if (q->nr_sorted == 0)
  268. return;
  269. if (printed++ < 10) {
  270. printk(KERN_ERR "%s: forced dispatching is broken "
  271. "(nr_sorted=%u), please report this\n",
  272. q->elevator->elevator_type->elevator_name, q->nr_sorted);
  273. }
  274. }
  275. void elv_insert(request_queue_t *q, struct request *rq, int where)
  276. {
  277. struct list_head *pos;
  278. unsigned ordseq;
  279. int unplug_it = 1;
  280. blk_add_trace_rq(q, rq, BLK_TA_INSERT);
  281. rq->q = q;
  282. switch (where) {
  283. case ELEVATOR_INSERT_FRONT:
  284. rq->flags |= REQ_SOFTBARRIER;
  285. list_add(&rq->queuelist, &q->queue_head);
  286. break;
  287. case ELEVATOR_INSERT_BACK:
  288. rq->flags |= REQ_SOFTBARRIER;
  289. elv_drain_elevator(q);
  290. list_add_tail(&rq->queuelist, &q->queue_head);
  291. /*
  292. * We kick the queue here for the following reasons.
  293. * - The elevator might have returned NULL previously
  294. * to delay requests and returned them now. As the
  295. * queue wasn't empty before this request, ll_rw_blk
  296. * won't run the queue on return, resulting in hang.
  297. * - Usually, back inserted requests won't be merged
  298. * with anything. There's no point in delaying queue
  299. * processing.
  300. */
  301. blk_remove_plug(q);
  302. q->request_fn(q);
  303. break;
  304. case ELEVATOR_INSERT_SORT:
  305. BUG_ON(!blk_fs_request(rq));
  306. rq->flags |= REQ_SORTED;
  307. q->nr_sorted++;
  308. if (q->last_merge == NULL && rq_mergeable(rq))
  309. q->last_merge = rq;
  310. /*
  311. * Some ioscheds (cfq) run q->request_fn directly, so
  312. * rq cannot be accessed after calling
  313. * elevator_add_req_fn.
  314. */
  315. q->elevator->ops->elevator_add_req_fn(q, rq);
  316. break;
  317. case ELEVATOR_INSERT_REQUEUE:
  318. /*
  319. * If ordered flush isn't in progress, we do front
  320. * insertion; otherwise, requests should be requeued
  321. * in ordseq order.
  322. */
  323. rq->flags |= REQ_SOFTBARRIER;
  324. if (q->ordseq == 0) {
  325. list_add(&rq->queuelist, &q->queue_head);
  326. break;
  327. }
  328. ordseq = blk_ordered_req_seq(rq);
  329. list_for_each(pos, &q->queue_head) {
  330. struct request *pos_rq = list_entry_rq(pos);
  331. if (ordseq <= blk_ordered_req_seq(pos_rq))
  332. break;
  333. }
  334. list_add_tail(&rq->queuelist, pos);
  335. /*
  336. * most requeues happen because of a busy condition, don't
  337. * force unplug of the queue for that case.
  338. */
  339. unplug_it = 0;
  340. break;
  341. default:
  342. printk(KERN_ERR "%s: bad insertion point %d\n",
  343. __FUNCTION__, where);
  344. BUG();
  345. }
  346. if (unplug_it && blk_queue_plugged(q)) {
  347. int nrq = q->rq.count[READ] + q->rq.count[WRITE]
  348. - q->in_flight;
  349. if (nrq >= q->unplug_thresh)
  350. __generic_unplug_device(q);
  351. }
  352. }
  353. void __elv_add_request(request_queue_t *q, struct request *rq, int where,
  354. int plug)
  355. {
  356. if (q->ordcolor)
  357. rq->flags |= REQ_ORDERED_COLOR;
  358. if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
  359. /*
  360. * toggle ordered color
  361. */
  362. if (blk_barrier_rq(rq))
  363. q->ordcolor ^= 1;
  364. /*
  365. * barriers implicitly indicate back insertion
  366. */
  367. if (where == ELEVATOR_INSERT_SORT)
  368. where = ELEVATOR_INSERT_BACK;
  369. /*
  370. * this request is scheduling boundary, update
  371. * end_sector
  372. */
  373. if (blk_fs_request(rq)) {
  374. q->end_sector = rq_end_sector(rq);
  375. q->boundary_rq = rq;
  376. }
  377. } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
  378. where = ELEVATOR_INSERT_BACK;
  379. if (plug)
  380. blk_plug_device(q);
  381. elv_insert(q, rq, where);
  382. }
  383. void elv_add_request(request_queue_t *q, struct request *rq, int where,
  384. int plug)
  385. {
  386. unsigned long flags;
  387. spin_lock_irqsave(q->queue_lock, flags);
  388. __elv_add_request(q, rq, where, plug);
  389. spin_unlock_irqrestore(q->queue_lock, flags);
  390. }
  391. static inline struct request *__elv_next_request(request_queue_t *q)
  392. {
  393. struct request *rq;
  394. while (1) {
  395. while (!list_empty(&q->queue_head)) {
  396. rq = list_entry_rq(q->queue_head.next);
  397. if (blk_do_ordered(q, &rq))
  398. return rq;
  399. }
  400. if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
  401. return NULL;
  402. }
  403. }
  404. struct request *elv_next_request(request_queue_t *q)
  405. {
  406. struct request *rq;
  407. int ret;
  408. while ((rq = __elv_next_request(q)) != NULL) {
  409. if (!(rq->flags & REQ_STARTED)) {
  410. elevator_t *e = q->elevator;
  411. /*
  412. * This is the first time the device driver
  413. * sees this request (possibly after
  414. * requeueing). Notify IO scheduler.
  415. */
  416. if (blk_sorted_rq(rq) &&
  417. e->ops->elevator_activate_req_fn)
  418. e->ops->elevator_activate_req_fn(q, rq);
  419. /*
  420. * just mark as started even if we don't start
  421. * it, a request that has been delayed should
  422. * not be passed by new incoming requests
  423. */
  424. rq->flags |= REQ_STARTED;
  425. blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
  426. }
  427. if (!q->boundary_rq || q->boundary_rq == rq) {
  428. q->end_sector = rq_end_sector(rq);
  429. q->boundary_rq = NULL;
  430. }
  431. if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
  432. break;
  433. ret = q->prep_rq_fn(q, rq);
  434. if (ret == BLKPREP_OK) {
  435. break;
  436. } else if (ret == BLKPREP_DEFER) {
  437. /*
  438. * the request may have been (partially) prepped.
  439. * we need to keep this request in the front to
  440. * avoid resource deadlock. REQ_STARTED will
  441. * prevent other fs requests from passing this one.
  442. */
  443. rq = NULL;
  444. break;
  445. } else if (ret == BLKPREP_KILL) {
  446. int nr_bytes = rq->hard_nr_sectors << 9;
  447. if (!nr_bytes)
  448. nr_bytes = rq->data_len;
  449. blkdev_dequeue_request(rq);
  450. rq->flags |= REQ_QUIET;
  451. end_that_request_chunk(rq, 0, nr_bytes);
  452. end_that_request_last(rq, 0);
  453. } else {
  454. printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
  455. ret);
  456. break;
  457. }
  458. }
  459. return rq;
  460. }
  461. void elv_dequeue_request(request_queue_t *q, struct request *rq)
  462. {
  463. BUG_ON(list_empty(&rq->queuelist));
  464. list_del_init(&rq->queuelist);
  465. /*
  466. * the time frame between a request being removed from the lists
  467. * and to it is freed is accounted as io that is in progress at
  468. * the driver side.
  469. */
  470. if (blk_account_rq(rq))
  471. q->in_flight++;
  472. }
  473. int elv_queue_empty(request_queue_t *q)
  474. {
  475. elevator_t *e = q->elevator;
  476. if (!list_empty(&q->queue_head))
  477. return 0;
  478. if (e->ops->elevator_queue_empty_fn)
  479. return e->ops->elevator_queue_empty_fn(q);
  480. return 1;
  481. }
  482. struct request *elv_latter_request(request_queue_t *q, struct request *rq)
  483. {
  484. elevator_t *e = q->elevator;
  485. if (e->ops->elevator_latter_req_fn)
  486. return e->ops->elevator_latter_req_fn(q, rq);
  487. return NULL;
  488. }
  489. struct request *elv_former_request(request_queue_t *q, struct request *rq)
  490. {
  491. elevator_t *e = q->elevator;
  492. if (e->ops->elevator_former_req_fn)
  493. return e->ops->elevator_former_req_fn(q, rq);
  494. return NULL;
  495. }
  496. int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
  497. gfp_t gfp_mask)
  498. {
  499. elevator_t *e = q->elevator;
  500. if (e->ops->elevator_set_req_fn)
  501. return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
  502. rq->elevator_private = NULL;
  503. return 0;
  504. }
  505. void elv_put_request(request_queue_t *q, struct request *rq)
  506. {
  507. elevator_t *e = q->elevator;
  508. if (e->ops->elevator_put_req_fn)
  509. e->ops->elevator_put_req_fn(q, rq);
  510. }
  511. int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
  512. {
  513. elevator_t *e = q->elevator;
  514. if (e->ops->elevator_may_queue_fn)
  515. return e->ops->elevator_may_queue_fn(q, rw, bio);
  516. return ELV_MQUEUE_MAY;
  517. }
  518. void elv_completed_request(request_queue_t *q, struct request *rq)
  519. {
  520. elevator_t *e = q->elevator;
  521. /*
  522. * request is released from the driver, io must be done
  523. */
  524. if (blk_account_rq(rq)) {
  525. q->in_flight--;
  526. if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
  527. e->ops->elevator_completed_req_fn(q, rq);
  528. }
  529. /*
  530. * Check if the queue is waiting for fs requests to be
  531. * drained for flush sequence.
  532. */
  533. if (unlikely(q->ordseq)) {
  534. struct request *first_rq = list_entry_rq(q->queue_head.next);
  535. if (q->in_flight == 0 &&
  536. blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
  537. blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
  538. blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
  539. q->request_fn(q);
  540. }
  541. }
  542. }
  543. #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
  544. static ssize_t
  545. elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  546. {
  547. elevator_t *e = container_of(kobj, elevator_t, kobj);
  548. struct elv_fs_entry *entry = to_elv(attr);
  549. ssize_t error;
  550. if (!entry->show)
  551. return -EIO;
  552. mutex_lock(&e->sysfs_lock);
  553. error = e->ops ? entry->show(e, page) : -ENOENT;
  554. mutex_unlock(&e->sysfs_lock);
  555. return error;
  556. }
  557. static ssize_t
  558. elv_attr_store(struct kobject *kobj, struct attribute *attr,
  559. const char *page, size_t length)
  560. {
  561. elevator_t *e = container_of(kobj, elevator_t, kobj);
  562. struct elv_fs_entry *entry = to_elv(attr);
  563. ssize_t error;
  564. if (!entry->store)
  565. return -EIO;
  566. mutex_lock(&e->sysfs_lock);
  567. error = e->ops ? entry->store(e, page, length) : -ENOENT;
  568. mutex_unlock(&e->sysfs_lock);
  569. return error;
  570. }
  571. static struct sysfs_ops elv_sysfs_ops = {
  572. .show = elv_attr_show,
  573. .store = elv_attr_store,
  574. };
  575. static struct kobj_type elv_ktype = {
  576. .sysfs_ops = &elv_sysfs_ops,
  577. .release = elevator_release,
  578. };
  579. int elv_register_queue(struct request_queue *q)
  580. {
  581. elevator_t *e = q->elevator;
  582. int error;
  583. e->kobj.parent = &q->kobj;
  584. error = kobject_add(&e->kobj);
  585. if (!error) {
  586. struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
  587. if (attr) {
  588. while (attr->attr.name) {
  589. if (sysfs_create_file(&e->kobj, &attr->attr))
  590. break;
  591. attr++;
  592. }
  593. }
  594. kobject_uevent(&e->kobj, KOBJ_ADD);
  595. }
  596. return error;
  597. }
  598. static void __elv_unregister_queue(elevator_t *e)
  599. {
  600. kobject_uevent(&e->kobj, KOBJ_REMOVE);
  601. kobject_del(&e->kobj);
  602. }
  603. void elv_unregister_queue(struct request_queue *q)
  604. {
  605. if (q)
  606. __elv_unregister_queue(q->elevator);
  607. }
  608. int elv_register(struct elevator_type *e)
  609. {
  610. spin_lock_irq(&elv_list_lock);
  611. BUG_ON(elevator_find(e->elevator_name));
  612. list_add_tail(&e->list, &elv_list);
  613. spin_unlock_irq(&elv_list_lock);
  614. printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
  615. if (!strcmp(e->elevator_name, chosen_elevator) ||
  616. (!*chosen_elevator &&
  617. !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
  618. printk(" (default)");
  619. printk("\n");
  620. return 0;
  621. }
  622. EXPORT_SYMBOL_GPL(elv_register);
  623. void elv_unregister(struct elevator_type *e)
  624. {
  625. struct task_struct *g, *p;
  626. /*
  627. * Iterate every thread in the process to remove the io contexts.
  628. */
  629. if (e->ops.trim) {
  630. read_lock(&tasklist_lock);
  631. do_each_thread(g, p) {
  632. task_lock(p);
  633. if (p->io_context)
  634. e->ops.trim(p->io_context);
  635. task_unlock(p);
  636. } while_each_thread(g, p);
  637. read_unlock(&tasklist_lock);
  638. }
  639. spin_lock_irq(&elv_list_lock);
  640. list_del_init(&e->list);
  641. spin_unlock_irq(&elv_list_lock);
  642. }
  643. EXPORT_SYMBOL_GPL(elv_unregister);
  644. /*
  645. * switch to new_e io scheduler. be careful not to introduce deadlocks -
  646. * we don't free the old io scheduler, before we have allocated what we
  647. * need for the new one. this way we have a chance of going back to the old
  648. * one, if the new one fails init for some reason.
  649. */
  650. static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
  651. {
  652. elevator_t *old_elevator, *e;
  653. void *data;
  654. /*
  655. * Allocate new elevator
  656. */
  657. e = elevator_alloc(new_e);
  658. if (!e)
  659. return 0;
  660. data = elevator_init_queue(q, e);
  661. if (!data) {
  662. kobject_put(&e->kobj);
  663. return 0;
  664. }
  665. /*
  666. * Turn on BYPASS and drain all requests w/ elevator private data
  667. */
  668. spin_lock_irq(q->queue_lock);
  669. set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  670. elv_drain_elevator(q);
  671. while (q->rq.elvpriv) {
  672. blk_remove_plug(q);
  673. q->request_fn(q);
  674. spin_unlock_irq(q->queue_lock);
  675. msleep(10);
  676. spin_lock_irq(q->queue_lock);
  677. elv_drain_elevator(q);
  678. }
  679. /*
  680. * Remember old elevator.
  681. */
  682. old_elevator = q->elevator;
  683. /*
  684. * attach and start new elevator
  685. */
  686. elevator_attach(q, e, data);
  687. spin_unlock_irq(q->queue_lock);
  688. __elv_unregister_queue(old_elevator);
  689. if (elv_register_queue(q))
  690. goto fail_register;
  691. /*
  692. * finally exit old elevator and turn off BYPASS.
  693. */
  694. elevator_exit(old_elevator);
  695. clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  696. return 1;
  697. fail_register:
  698. /*
  699. * switch failed, exit the new io scheduler and reattach the old
  700. * one again (along with re-adding the sysfs dir)
  701. */
  702. elevator_exit(e);
  703. q->elevator = old_elevator;
  704. elv_register_queue(q);
  705. clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  706. return 0;
  707. }
  708. ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
  709. {
  710. char elevator_name[ELV_NAME_MAX];
  711. size_t len;
  712. struct elevator_type *e;
  713. elevator_name[sizeof(elevator_name) - 1] = '\0';
  714. strncpy(elevator_name, name, sizeof(elevator_name) - 1);
  715. len = strlen(elevator_name);
  716. if (len && elevator_name[len - 1] == '\n')
  717. elevator_name[len - 1] = '\0';
  718. e = elevator_get(elevator_name);
  719. if (!e) {
  720. printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
  721. return -EINVAL;
  722. }
  723. if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
  724. elevator_put(e);
  725. return count;
  726. }
  727. if (!elevator_switch(q, e))
  728. printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
  729. return count;
  730. }
  731. ssize_t elv_iosched_show(request_queue_t *q, char *name)
  732. {
  733. elevator_t *e = q->elevator;
  734. struct elevator_type *elv = e->elevator_type;
  735. struct list_head *entry;
  736. int len = 0;
  737. spin_lock_irq(q->queue_lock);
  738. list_for_each(entry, &elv_list) {
  739. struct elevator_type *__e;
  740. __e = list_entry(entry, struct elevator_type, list);
  741. if (!strcmp(elv->elevator_name, __e->elevator_name))
  742. len += sprintf(name+len, "[%s] ", elv->elevator_name);
  743. else
  744. len += sprintf(name+len, "%s ", __e->elevator_name);
  745. }
  746. spin_unlock_irq(q->queue_lock);
  747. len += sprintf(len+name, "\n");
  748. return len;
  749. }
  750. EXPORT_SYMBOL(elv_dispatch_sort);
  751. EXPORT_SYMBOL(elv_add_request);
  752. EXPORT_SYMBOL(__elv_add_request);
  753. EXPORT_SYMBOL(elv_next_request);
  754. EXPORT_SYMBOL(elv_dequeue_request);
  755. EXPORT_SYMBOL(elv_queue_empty);
  756. EXPORT_SYMBOL(elevator_exit);
  757. EXPORT_SYMBOL(elevator_init);