elevator.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /*
  2. * Block device elevator/IO-scheduler.
  3. *
  4. * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  5. *
  6. * 30042000 Jens Axboe <axboe@suse.de> :
  7. *
  8. * Split the elevator a bit so that it is possible to choose a different
  9. * one or even write a new "plug in". There are three pieces:
  10. * - elevator_fn, inserts a new request in the queue list
  11. * - elevator_merge_fn, decides whether a new buffer can be merged with
  12. * an existing request
  13. * - elevator_dequeue_fn, called when a request is taken off the active list
  14. *
  15. * 20082000 Dave Jones <davej@suse.de> :
  16. * Removed tests for max-bomb-segments, which was breaking elvtune
  17. * when run without -bN
  18. *
  19. * Jens:
  20. * - Rework again to work with bio instead of buffer_heads
  21. * - loose bi_dev comparisons, partition handling is right now
  22. * - completely modularize elevator setup and teardown
  23. *
  24. */
  25. #include <linux/kernel.h>
  26. #include <linux/fs.h>
  27. #include <linux/blkdev.h>
  28. #include <linux/elevator.h>
  29. #include <linux/bio.h>
  30. #include <linux/config.h>
  31. #include <linux/module.h>
  32. #include <linux/slab.h>
  33. #include <linux/init.h>
  34. #include <linux/compiler.h>
  35. #include <linux/delay.h>
  36. #include <linux/blktrace_api.h>
  37. #include <asm/uaccess.h>
  38. static DEFINE_SPINLOCK(elv_list_lock);
  39. static LIST_HEAD(elv_list);
  40. /*
  41. * can we safely merge with this request?
  42. */
  43. inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
  44. {
  45. if (!rq_mergeable(rq))
  46. return 0;
  47. /*
  48. * different data direction or already started, don't merge
  49. */
  50. if (bio_data_dir(bio) != rq_data_dir(rq))
  51. return 0;
  52. /*
  53. * same device and no special stuff set, merge is ok
  54. */
  55. if (rq->rq_disk == bio->bi_bdev->bd_disk &&
  56. !rq->waiting && !rq->special)
  57. return 1;
  58. return 0;
  59. }
  60. EXPORT_SYMBOL(elv_rq_merge_ok);
  61. static inline int elv_try_merge(struct request *__rq, struct bio *bio)
  62. {
  63. int ret = ELEVATOR_NO_MERGE;
  64. /*
  65. * we can merge and sequence is ok, check if it's possible
  66. */
  67. if (elv_rq_merge_ok(__rq, bio)) {
  68. if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
  69. ret = ELEVATOR_BACK_MERGE;
  70. else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
  71. ret = ELEVATOR_FRONT_MERGE;
  72. }
  73. return ret;
  74. }
  75. static struct elevator_type *elevator_find(const char *name)
  76. {
  77. struct elevator_type *e = NULL;
  78. struct list_head *entry;
  79. list_for_each(entry, &elv_list) {
  80. struct elevator_type *__e;
  81. __e = list_entry(entry, struct elevator_type, list);
  82. if (!strcmp(__e->elevator_name, name)) {
  83. e = __e;
  84. break;
  85. }
  86. }
  87. return e;
  88. }
  89. static void elevator_put(struct elevator_type *e)
  90. {
  91. module_put(e->elevator_owner);
  92. }
  93. static struct elevator_type *elevator_get(const char *name)
  94. {
  95. struct elevator_type *e;
  96. spin_lock_irq(&elv_list_lock);
  97. e = elevator_find(name);
  98. if (e && !try_module_get(e->elevator_owner))
  99. e = NULL;
  100. spin_unlock_irq(&elv_list_lock);
  101. return e;
  102. }
  103. static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
  104. {
  105. return eq->ops->elevator_init_fn(q, eq);
  106. }
  107. static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
  108. void *data)
  109. {
  110. q->elevator = eq;
  111. eq->elevator_data = data;
  112. }
  113. static char chosen_elevator[16];
  114. static int __init elevator_setup(char *str)
  115. {
  116. /*
  117. * Be backwards-compatible with previous kernels, so users
  118. * won't get the wrong elevator.
  119. */
  120. if (!strcmp(str, "as"))
  121. strcpy(chosen_elevator, "anticipatory");
  122. else
  123. strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
  124. return 1;
  125. }
  126. __setup("elevator=", elevator_setup);
  127. static struct kobj_type elv_ktype;
  128. static elevator_t *elevator_alloc(struct elevator_type *e)
  129. {
  130. elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
  131. if (eq) {
  132. memset(eq, 0, sizeof(*eq));
  133. eq->ops = &e->ops;
  134. eq->elevator_type = e;
  135. kobject_init(&eq->kobj);
  136. snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
  137. eq->kobj.ktype = &elv_ktype;
  138. mutex_init(&eq->sysfs_lock);
  139. } else {
  140. elevator_put(e);
  141. }
  142. return eq;
  143. }
  144. static void elevator_release(struct kobject *kobj)
  145. {
  146. elevator_t *e = container_of(kobj, elevator_t, kobj);
  147. elevator_put(e->elevator_type);
  148. kfree(e);
  149. }
  150. int elevator_init(request_queue_t *q, char *name)
  151. {
  152. struct elevator_type *e = NULL;
  153. struct elevator_queue *eq;
  154. int ret = 0;
  155. void *data;
  156. INIT_LIST_HEAD(&q->queue_head);
  157. q->last_merge = NULL;
  158. q->end_sector = 0;
  159. q->boundary_rq = NULL;
  160. if (name && !(e = elevator_get(name)))
  161. return -EINVAL;
  162. if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
  163. printk("I/O scheduler %s not found\n", chosen_elevator);
  164. if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
  165. printk("Default I/O scheduler not found, using no-op\n");
  166. e = elevator_get("noop");
  167. }
  168. eq = elevator_alloc(e);
  169. if (!eq)
  170. return -ENOMEM;
  171. data = elevator_init_queue(q, eq);
  172. if (!data) {
  173. kobject_put(&eq->kobj);
  174. return -ENOMEM;
  175. }
  176. elevator_attach(q, eq, data);
  177. return ret;
  178. }
  179. void elevator_exit(elevator_t *e)
  180. {
  181. mutex_lock(&e->sysfs_lock);
  182. if (e->ops->elevator_exit_fn)
  183. e->ops->elevator_exit_fn(e);
  184. e->ops = NULL;
  185. mutex_unlock(&e->sysfs_lock);
  186. kobject_put(&e->kobj);
  187. }
  188. /*
  189. * Insert rq into dispatch queue of q. Queue lock must be held on
  190. * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
  191. * appended to the dispatch queue. To be used by specific elevators.
  192. */
  193. void elv_dispatch_sort(request_queue_t *q, struct request *rq)
  194. {
  195. sector_t boundary;
  196. struct list_head *entry;
  197. if (q->last_merge == rq)
  198. q->last_merge = NULL;
  199. q->nr_sorted--;
  200. boundary = q->end_sector;
  201. list_for_each_prev(entry, &q->queue_head) {
  202. struct request *pos = list_entry_rq(entry);
  203. if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
  204. break;
  205. if (rq->sector >= boundary) {
  206. if (pos->sector < boundary)
  207. continue;
  208. } else {
  209. if (pos->sector >= boundary)
  210. break;
  211. }
  212. if (rq->sector >= pos->sector)
  213. break;
  214. }
  215. list_add(&rq->queuelist, entry);
  216. }
  217. int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
  218. {
  219. elevator_t *e = q->elevator;
  220. int ret;
  221. if (q->last_merge) {
  222. ret = elv_try_merge(q->last_merge, bio);
  223. if (ret != ELEVATOR_NO_MERGE) {
  224. *req = q->last_merge;
  225. return ret;
  226. }
  227. }
  228. if (e->ops->elevator_merge_fn)
  229. return e->ops->elevator_merge_fn(q, req, bio);
  230. return ELEVATOR_NO_MERGE;
  231. }
  232. void elv_merged_request(request_queue_t *q, struct request *rq)
  233. {
  234. elevator_t *e = q->elevator;
  235. if (e->ops->elevator_merged_fn)
  236. e->ops->elevator_merged_fn(q, rq);
  237. q->last_merge = rq;
  238. }
  239. void elv_merge_requests(request_queue_t *q, struct request *rq,
  240. struct request *next)
  241. {
  242. elevator_t *e = q->elevator;
  243. if (e->ops->elevator_merge_req_fn)
  244. e->ops->elevator_merge_req_fn(q, rq, next);
  245. q->nr_sorted--;
  246. q->last_merge = rq;
  247. }
  248. void elv_requeue_request(request_queue_t *q, struct request *rq)
  249. {
  250. elevator_t *e = q->elevator;
  251. /*
  252. * it already went through dequeue, we need to decrement the
  253. * in_flight count again
  254. */
  255. if (blk_account_rq(rq)) {
  256. q->in_flight--;
  257. if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
  258. e->ops->elevator_deactivate_req_fn(q, rq);
  259. }
  260. rq->flags &= ~REQ_STARTED;
  261. elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
  262. }
  263. static void elv_drain_elevator(request_queue_t *q)
  264. {
  265. static int printed;
  266. while (q->elevator->ops->elevator_dispatch_fn(q, 1))
  267. ;
  268. if (q->nr_sorted == 0)
  269. return;
  270. if (printed++ < 10) {
  271. printk(KERN_ERR "%s: forced dispatching is broken "
  272. "(nr_sorted=%u), please report this\n",
  273. q->elevator->elevator_type->elevator_name, q->nr_sorted);
  274. }
  275. }
  276. void elv_insert(request_queue_t *q, struct request *rq, int where)
  277. {
  278. struct list_head *pos;
  279. unsigned ordseq;
  280. int unplug_it = 1;
  281. blk_add_trace_rq(q, rq, BLK_TA_INSERT);
  282. rq->q = q;
  283. switch (where) {
  284. case ELEVATOR_INSERT_FRONT:
  285. rq->flags |= REQ_SOFTBARRIER;
  286. list_add(&rq->queuelist, &q->queue_head);
  287. break;
  288. case ELEVATOR_INSERT_BACK:
  289. rq->flags |= REQ_SOFTBARRIER;
  290. elv_drain_elevator(q);
  291. list_add_tail(&rq->queuelist, &q->queue_head);
  292. /*
  293. * We kick the queue here for the following reasons.
  294. * - The elevator might have returned NULL previously
  295. * to delay requests and returned them now. As the
  296. * queue wasn't empty before this request, ll_rw_blk
  297. * won't run the queue on return, resulting in hang.
  298. * - Usually, back inserted requests won't be merged
  299. * with anything. There's no point in delaying queue
  300. * processing.
  301. */
  302. blk_remove_plug(q);
  303. q->request_fn(q);
  304. break;
  305. case ELEVATOR_INSERT_SORT:
  306. BUG_ON(!blk_fs_request(rq));
  307. rq->flags |= REQ_SORTED;
  308. q->nr_sorted++;
  309. if (q->last_merge == NULL && rq_mergeable(rq))
  310. q->last_merge = rq;
  311. /*
  312. * Some ioscheds (cfq) run q->request_fn directly, so
  313. * rq cannot be accessed after calling
  314. * elevator_add_req_fn.
  315. */
  316. q->elevator->ops->elevator_add_req_fn(q, rq);
  317. break;
  318. case ELEVATOR_INSERT_REQUEUE:
  319. /*
  320. * If ordered flush isn't in progress, we do front
  321. * insertion; otherwise, requests should be requeued
  322. * in ordseq order.
  323. */
  324. rq->flags |= REQ_SOFTBARRIER;
  325. if (q->ordseq == 0) {
  326. list_add(&rq->queuelist, &q->queue_head);
  327. break;
  328. }
  329. ordseq = blk_ordered_req_seq(rq);
  330. list_for_each(pos, &q->queue_head) {
  331. struct request *pos_rq = list_entry_rq(pos);
  332. if (ordseq <= blk_ordered_req_seq(pos_rq))
  333. break;
  334. }
  335. list_add_tail(&rq->queuelist, pos);
  336. /*
  337. * most requeues happen because of a busy condition, don't
  338. * force unplug of the queue for that case.
  339. */
  340. unplug_it = 0;
  341. break;
  342. default:
  343. printk(KERN_ERR "%s: bad insertion point %d\n",
  344. __FUNCTION__, where);
  345. BUG();
  346. }
  347. if (unplug_it && blk_queue_plugged(q)) {
  348. int nrq = q->rq.count[READ] + q->rq.count[WRITE]
  349. - q->in_flight;
  350. if (nrq >= q->unplug_thresh)
  351. __generic_unplug_device(q);
  352. }
  353. }
  354. void __elv_add_request(request_queue_t *q, struct request *rq, int where,
  355. int plug)
  356. {
  357. if (q->ordcolor)
  358. rq->flags |= REQ_ORDERED_COLOR;
  359. if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
  360. /*
  361. * toggle ordered color
  362. */
  363. if (blk_barrier_rq(rq))
  364. q->ordcolor ^= 1;
  365. /*
  366. * barriers implicitly indicate back insertion
  367. */
  368. if (where == ELEVATOR_INSERT_SORT)
  369. where = ELEVATOR_INSERT_BACK;
  370. /*
  371. * this request is scheduling boundary, update
  372. * end_sector
  373. */
  374. if (blk_fs_request(rq)) {
  375. q->end_sector = rq_end_sector(rq);
  376. q->boundary_rq = rq;
  377. }
  378. } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
  379. where = ELEVATOR_INSERT_BACK;
  380. if (plug)
  381. blk_plug_device(q);
  382. elv_insert(q, rq, where);
  383. }
  384. void elv_add_request(request_queue_t *q, struct request *rq, int where,
  385. int plug)
  386. {
  387. unsigned long flags;
  388. spin_lock_irqsave(q->queue_lock, flags);
  389. __elv_add_request(q, rq, where, plug);
  390. spin_unlock_irqrestore(q->queue_lock, flags);
  391. }
  392. static inline struct request *__elv_next_request(request_queue_t *q)
  393. {
  394. struct request *rq;
  395. while (1) {
  396. while (!list_empty(&q->queue_head)) {
  397. rq = list_entry_rq(q->queue_head.next);
  398. if (blk_do_ordered(q, &rq))
  399. return rq;
  400. }
  401. if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
  402. return NULL;
  403. }
  404. }
  405. struct request *elv_next_request(request_queue_t *q)
  406. {
  407. struct request *rq;
  408. int ret;
  409. while ((rq = __elv_next_request(q)) != NULL) {
  410. if (!(rq->flags & REQ_STARTED)) {
  411. elevator_t *e = q->elevator;
  412. /*
  413. * This is the first time the device driver
  414. * sees this request (possibly after
  415. * requeueing). Notify IO scheduler.
  416. */
  417. if (blk_sorted_rq(rq) &&
  418. e->ops->elevator_activate_req_fn)
  419. e->ops->elevator_activate_req_fn(q, rq);
  420. /*
  421. * just mark as started even if we don't start
  422. * it, a request that has been delayed should
  423. * not be passed by new incoming requests
  424. */
  425. rq->flags |= REQ_STARTED;
  426. blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
  427. }
  428. if (!q->boundary_rq || q->boundary_rq == rq) {
  429. q->end_sector = rq_end_sector(rq);
  430. q->boundary_rq = NULL;
  431. }
  432. if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
  433. break;
  434. ret = q->prep_rq_fn(q, rq);
  435. if (ret == BLKPREP_OK) {
  436. break;
  437. } else if (ret == BLKPREP_DEFER) {
  438. /*
  439. * the request may have been (partially) prepped.
  440. * we need to keep this request in the front to
  441. * avoid resource deadlock. REQ_STARTED will
  442. * prevent other fs requests from passing this one.
  443. */
  444. rq = NULL;
  445. break;
  446. } else if (ret == BLKPREP_KILL) {
  447. int nr_bytes = rq->hard_nr_sectors << 9;
  448. if (!nr_bytes)
  449. nr_bytes = rq->data_len;
  450. blkdev_dequeue_request(rq);
  451. rq->flags |= REQ_QUIET;
  452. end_that_request_chunk(rq, 0, nr_bytes);
  453. end_that_request_last(rq, 0);
  454. } else {
  455. printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
  456. ret);
  457. break;
  458. }
  459. }
  460. return rq;
  461. }
  462. void elv_dequeue_request(request_queue_t *q, struct request *rq)
  463. {
  464. BUG_ON(list_empty(&rq->queuelist));
  465. list_del_init(&rq->queuelist);
  466. /*
  467. * the time frame between a request being removed from the lists
  468. * and to it is freed is accounted as io that is in progress at
  469. * the driver side.
  470. */
  471. if (blk_account_rq(rq))
  472. q->in_flight++;
  473. }
  474. int elv_queue_empty(request_queue_t *q)
  475. {
  476. elevator_t *e = q->elevator;
  477. if (!list_empty(&q->queue_head))
  478. return 0;
  479. if (e->ops->elevator_queue_empty_fn)
  480. return e->ops->elevator_queue_empty_fn(q);
  481. return 1;
  482. }
  483. struct request *elv_latter_request(request_queue_t *q, struct request *rq)
  484. {
  485. elevator_t *e = q->elevator;
  486. if (e->ops->elevator_latter_req_fn)
  487. return e->ops->elevator_latter_req_fn(q, rq);
  488. return NULL;
  489. }
  490. struct request *elv_former_request(request_queue_t *q, struct request *rq)
  491. {
  492. elevator_t *e = q->elevator;
  493. if (e->ops->elevator_former_req_fn)
  494. return e->ops->elevator_former_req_fn(q, rq);
  495. return NULL;
  496. }
  497. int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
  498. gfp_t gfp_mask)
  499. {
  500. elevator_t *e = q->elevator;
  501. if (e->ops->elevator_set_req_fn)
  502. return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
  503. rq->elevator_private = NULL;
  504. return 0;
  505. }
  506. void elv_put_request(request_queue_t *q, struct request *rq)
  507. {
  508. elevator_t *e = q->elevator;
  509. if (e->ops->elevator_put_req_fn)
  510. e->ops->elevator_put_req_fn(q, rq);
  511. }
  512. int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
  513. {
  514. elevator_t *e = q->elevator;
  515. if (e->ops->elevator_may_queue_fn)
  516. return e->ops->elevator_may_queue_fn(q, rw, bio);
  517. return ELV_MQUEUE_MAY;
  518. }
  519. void elv_completed_request(request_queue_t *q, struct request *rq)
  520. {
  521. elevator_t *e = q->elevator;
  522. /*
  523. * request is released from the driver, io must be done
  524. */
  525. if (blk_account_rq(rq)) {
  526. q->in_flight--;
  527. if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
  528. e->ops->elevator_completed_req_fn(q, rq);
  529. }
  530. /*
  531. * Check if the queue is waiting for fs requests to be
  532. * drained for flush sequence.
  533. */
  534. if (unlikely(q->ordseq)) {
  535. struct request *first_rq = list_entry_rq(q->queue_head.next);
  536. if (q->in_flight == 0 &&
  537. blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
  538. blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
  539. blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
  540. q->request_fn(q);
  541. }
  542. }
  543. }
  544. #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
  545. static ssize_t
  546. elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  547. {
  548. elevator_t *e = container_of(kobj, elevator_t, kobj);
  549. struct elv_fs_entry *entry = to_elv(attr);
  550. ssize_t error;
  551. if (!entry->show)
  552. return -EIO;
  553. mutex_lock(&e->sysfs_lock);
  554. error = e->ops ? entry->show(e, page) : -ENOENT;
  555. mutex_unlock(&e->sysfs_lock);
  556. return error;
  557. }
  558. static ssize_t
  559. elv_attr_store(struct kobject *kobj, struct attribute *attr,
  560. const char *page, size_t length)
  561. {
  562. elevator_t *e = container_of(kobj, elevator_t, kobj);
  563. struct elv_fs_entry *entry = to_elv(attr);
  564. ssize_t error;
  565. if (!entry->store)
  566. return -EIO;
  567. mutex_lock(&e->sysfs_lock);
  568. error = e->ops ? entry->store(e, page, length) : -ENOENT;
  569. mutex_unlock(&e->sysfs_lock);
  570. return error;
  571. }
  572. static struct sysfs_ops elv_sysfs_ops = {
  573. .show = elv_attr_show,
  574. .store = elv_attr_store,
  575. };
  576. static struct kobj_type elv_ktype = {
  577. .sysfs_ops = &elv_sysfs_ops,
  578. .release = elevator_release,
  579. };
  580. int elv_register_queue(struct request_queue *q)
  581. {
  582. elevator_t *e = q->elevator;
  583. int error;
  584. e->kobj.parent = &q->kobj;
  585. error = kobject_add(&e->kobj);
  586. if (!error) {
  587. struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
  588. if (attr) {
  589. while (attr->attr.name) {
  590. if (sysfs_create_file(&e->kobj, &attr->attr))
  591. break;
  592. attr++;
  593. }
  594. }
  595. kobject_uevent(&e->kobj, KOBJ_ADD);
  596. }
  597. return error;
  598. }
  599. static void __elv_unregister_queue(elevator_t *e)
  600. {
  601. kobject_uevent(&e->kobj, KOBJ_REMOVE);
  602. kobject_del(&e->kobj);
  603. }
  604. void elv_unregister_queue(struct request_queue *q)
  605. {
  606. if (q)
  607. __elv_unregister_queue(q->elevator);
  608. }
  609. int elv_register(struct elevator_type *e)
  610. {
  611. spin_lock_irq(&elv_list_lock);
  612. BUG_ON(elevator_find(e->elevator_name));
  613. list_add_tail(&e->list, &elv_list);
  614. spin_unlock_irq(&elv_list_lock);
  615. printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
  616. if (!strcmp(e->elevator_name, chosen_elevator) ||
  617. (!*chosen_elevator &&
  618. !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
  619. printk(" (default)");
  620. printk("\n");
  621. return 0;
  622. }
  623. EXPORT_SYMBOL_GPL(elv_register);
  624. void elv_unregister(struct elevator_type *e)
  625. {
  626. struct task_struct *g, *p;
  627. /*
  628. * Iterate every thread in the process to remove the io contexts.
  629. */
  630. if (e->ops.trim) {
  631. read_lock(&tasklist_lock);
  632. do_each_thread(g, p) {
  633. task_lock(p);
  634. e->ops.trim(p->io_context);
  635. task_unlock(p);
  636. } while_each_thread(g, p);
  637. read_unlock(&tasklist_lock);
  638. }
  639. spin_lock_irq(&elv_list_lock);
  640. list_del_init(&e->list);
  641. spin_unlock_irq(&elv_list_lock);
  642. }
  643. EXPORT_SYMBOL_GPL(elv_unregister);
  644. /*
  645. * switch to new_e io scheduler. be careful not to introduce deadlocks -
  646. * we don't free the old io scheduler, before we have allocated what we
  647. * need for the new one. this way we have a chance of going back to the old
  648. * one, if the new one fails init for some reason.
  649. */
  650. static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
  651. {
  652. elevator_t *old_elevator, *e;
  653. void *data;
  654. /*
  655. * Allocate new elevator
  656. */
  657. e = elevator_alloc(new_e);
  658. if (!e)
  659. return 0;
  660. data = elevator_init_queue(q, e);
  661. if (!data) {
  662. kobject_put(&e->kobj);
  663. return 0;
  664. }
  665. /*
  666. * Turn on BYPASS and drain all requests w/ elevator private data
  667. */
  668. spin_lock_irq(q->queue_lock);
  669. set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  670. elv_drain_elevator(q);
  671. while (q->rq.elvpriv) {
  672. blk_remove_plug(q);
  673. q->request_fn(q);
  674. spin_unlock_irq(q->queue_lock);
  675. msleep(10);
  676. spin_lock_irq(q->queue_lock);
  677. elv_drain_elevator(q);
  678. }
  679. /*
  680. * Remember old elevator.
  681. */
  682. old_elevator = q->elevator;
  683. /*
  684. * attach and start new elevator
  685. */
  686. elevator_attach(q, e, data);
  687. spin_unlock_irq(q->queue_lock);
  688. __elv_unregister_queue(old_elevator);
  689. if (elv_register_queue(q))
  690. goto fail_register;
  691. /*
  692. * finally exit old elevator and turn off BYPASS.
  693. */
  694. elevator_exit(old_elevator);
  695. clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  696. return 1;
  697. fail_register:
  698. /*
  699. * switch failed, exit the new io scheduler and reattach the old
  700. * one again (along with re-adding the sysfs dir)
  701. */
  702. elevator_exit(e);
  703. e = NULL;
  704. q->elevator = old_elevator;
  705. elv_register_queue(q);
  706. clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  707. if (e)
  708. kobject_put(&e->kobj);
  709. return 0;
  710. }
  711. ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
  712. {
  713. char elevator_name[ELV_NAME_MAX];
  714. size_t len;
  715. struct elevator_type *e;
  716. elevator_name[sizeof(elevator_name) - 1] = '\0';
  717. strncpy(elevator_name, name, sizeof(elevator_name) - 1);
  718. len = strlen(elevator_name);
  719. if (len && elevator_name[len - 1] == '\n')
  720. elevator_name[len - 1] = '\0';
  721. e = elevator_get(elevator_name);
  722. if (!e) {
  723. printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
  724. return -EINVAL;
  725. }
  726. if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
  727. elevator_put(e);
  728. return count;
  729. }
  730. if (!elevator_switch(q, e))
  731. printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
  732. return count;
  733. }
  734. ssize_t elv_iosched_show(request_queue_t *q, char *name)
  735. {
  736. elevator_t *e = q->elevator;
  737. struct elevator_type *elv = e->elevator_type;
  738. struct list_head *entry;
  739. int len = 0;
  740. spin_lock_irq(q->queue_lock);
  741. list_for_each(entry, &elv_list) {
  742. struct elevator_type *__e;
  743. __e = list_entry(entry, struct elevator_type, list);
  744. if (!strcmp(elv->elevator_name, __e->elevator_name))
  745. len += sprintf(name+len, "[%s] ", elv->elevator_name);
  746. else
  747. len += sprintf(name+len, "%s ", __e->elevator_name);
  748. }
  749. spin_unlock_irq(q->queue_lock);
  750. len += sprintf(len+name, "\n");
  751. return len;
  752. }
  753. EXPORT_SYMBOL(elv_dispatch_sort);
  754. EXPORT_SYMBOL(elv_add_request);
  755. EXPORT_SYMBOL(__elv_add_request);
  756. EXPORT_SYMBOL(elv_next_request);
  757. EXPORT_SYMBOL(elv_dequeue_request);
  758. EXPORT_SYMBOL(elv_queue_empty);
  759. EXPORT_SYMBOL(elevator_exit);
  760. EXPORT_SYMBOL(elevator_init);