elevator.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908
  1. /*
  2. * Block device elevator/IO-scheduler.
  3. *
  4. * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  5. *
  6. * 30042000 Jens Axboe <axboe@suse.de> :
  7. *
  8. * Split the elevator a bit so that it is possible to choose a different
  9. * one or even write a new "plug in". There are three pieces:
  10. * - elevator_fn, inserts a new request in the queue list
  11. * - elevator_merge_fn, decides whether a new buffer can be merged with
  12. * an existing request
  13. * - elevator_dequeue_fn, called when a request is taken off the active list
  14. *
  15. * 20082000 Dave Jones <davej@suse.de> :
  16. * Removed tests for max-bomb-segments, which was breaking elvtune
  17. * when run without -bN
  18. *
  19. * Jens:
  20. * - Rework again to work with bio instead of buffer_heads
  21. * - loose bi_dev comparisons, partition handling is right now
  22. * - completely modularize elevator setup and teardown
  23. *
  24. */
  25. #include <linux/kernel.h>
  26. #include <linux/fs.h>
  27. #include <linux/blkdev.h>
  28. #include <linux/elevator.h>
  29. #include <linux/bio.h>
  30. #include <linux/config.h>
  31. #include <linux/module.h>
  32. #include <linux/slab.h>
  33. #include <linux/init.h>
  34. #include <linux/compiler.h>
  35. #include <linux/delay.h>
  36. #include <linux/blktrace_api.h>
  37. #include <asm/uaccess.h>
  38. static DEFINE_SPINLOCK(elv_list_lock);
  39. static LIST_HEAD(elv_list);
  40. /*
  41. * can we safely merge with this request?
  42. */
  43. inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
  44. {
  45. if (!rq_mergeable(rq))
  46. return 0;
  47. /*
  48. * different data direction or already started, don't merge
  49. */
  50. if (bio_data_dir(bio) != rq_data_dir(rq))
  51. return 0;
  52. /*
  53. * same device and no special stuff set, merge is ok
  54. */
  55. if (rq->rq_disk == bio->bi_bdev->bd_disk &&
  56. !rq->waiting && !rq->special)
  57. return 1;
  58. return 0;
  59. }
  60. EXPORT_SYMBOL(elv_rq_merge_ok);
  61. static inline int elv_try_merge(struct request *__rq, struct bio *bio)
  62. {
  63. int ret = ELEVATOR_NO_MERGE;
  64. /*
  65. * we can merge and sequence is ok, check if it's possible
  66. */
  67. if (elv_rq_merge_ok(__rq, bio)) {
  68. if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
  69. ret = ELEVATOR_BACK_MERGE;
  70. else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
  71. ret = ELEVATOR_FRONT_MERGE;
  72. }
  73. return ret;
  74. }
  75. static struct elevator_type *elevator_find(const char *name)
  76. {
  77. struct elevator_type *e = NULL;
  78. struct list_head *entry;
  79. list_for_each(entry, &elv_list) {
  80. struct elevator_type *__e;
  81. __e = list_entry(entry, struct elevator_type, list);
  82. if (!strcmp(__e->elevator_name, name)) {
  83. e = __e;
  84. break;
  85. }
  86. }
  87. return e;
  88. }
  89. static void elevator_put(struct elevator_type *e)
  90. {
  91. module_put(e->elevator_owner);
  92. }
  93. static struct elevator_type *elevator_get(const char *name)
  94. {
  95. struct elevator_type *e;
  96. spin_lock_irq(&elv_list_lock);
  97. e = elevator_find(name);
  98. if (e && !try_module_get(e->elevator_owner))
  99. e = NULL;
  100. spin_unlock_irq(&elv_list_lock);
  101. return e;
  102. }
  103. static int elevator_attach(request_queue_t *q, struct elevator_queue *eq)
  104. {
  105. int ret = 0;
  106. q->elevator = eq;
  107. if (eq->ops->elevator_init_fn)
  108. ret = eq->ops->elevator_init_fn(q, eq);
  109. return ret;
  110. }
  111. static char chosen_elevator[16];
  112. static int __init elevator_setup(char *str)
  113. {
  114. /*
  115. * Be backwards-compatible with previous kernels, so users
  116. * won't get the wrong elevator.
  117. */
  118. if (!strcmp(str, "as"))
  119. strcpy(chosen_elevator, "anticipatory");
  120. else
  121. strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
  122. return 1;
  123. }
  124. __setup("elevator=", elevator_setup);
  125. static struct kobj_type elv_ktype;
  126. static elevator_t *elevator_alloc(struct elevator_type *e)
  127. {
  128. elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
  129. if (eq) {
  130. memset(eq, 0, sizeof(*eq));
  131. eq->ops = &e->ops;
  132. eq->elevator_type = e;
  133. kobject_init(&eq->kobj);
  134. snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
  135. eq->kobj.ktype = &elv_ktype;
  136. mutex_init(&eq->sysfs_lock);
  137. } else {
  138. elevator_put(e);
  139. }
  140. return eq;
  141. }
  142. static void elevator_release(struct kobject *kobj)
  143. {
  144. elevator_t *e = container_of(kobj, elevator_t, kobj);
  145. elevator_put(e->elevator_type);
  146. kfree(e);
  147. }
  148. int elevator_init(request_queue_t *q, char *name)
  149. {
  150. struct elevator_type *e = NULL;
  151. struct elevator_queue *eq;
  152. int ret = 0;
  153. INIT_LIST_HEAD(&q->queue_head);
  154. q->last_merge = NULL;
  155. q->end_sector = 0;
  156. q->boundary_rq = NULL;
  157. if (name && !(e = elevator_get(name)))
  158. return -EINVAL;
  159. if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
  160. printk("I/O scheduler %s not found\n", chosen_elevator);
  161. if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
  162. printk("Default I/O scheduler not found, using no-op\n");
  163. e = elevator_get("noop");
  164. }
  165. eq = elevator_alloc(e);
  166. if (!eq)
  167. return -ENOMEM;
  168. ret = elevator_attach(q, eq);
  169. if (ret)
  170. kobject_put(&eq->kobj);
  171. return ret;
  172. }
  173. void elevator_exit(elevator_t *e)
  174. {
  175. mutex_lock(&e->sysfs_lock);
  176. if (e->ops->elevator_exit_fn)
  177. e->ops->elevator_exit_fn(e);
  178. e->ops = NULL;
  179. mutex_unlock(&e->sysfs_lock);
  180. kobject_put(&e->kobj);
  181. }
  182. /*
  183. * Insert rq into dispatch queue of q. Queue lock must be held on
  184. * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
  185. * appended to the dispatch queue. To be used by specific elevators.
  186. */
  187. void elv_dispatch_sort(request_queue_t *q, struct request *rq)
  188. {
  189. sector_t boundary;
  190. struct list_head *entry;
  191. if (q->last_merge == rq)
  192. q->last_merge = NULL;
  193. q->nr_sorted--;
  194. boundary = q->end_sector;
  195. list_for_each_prev(entry, &q->queue_head) {
  196. struct request *pos = list_entry_rq(entry);
  197. if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
  198. break;
  199. if (rq->sector >= boundary) {
  200. if (pos->sector < boundary)
  201. continue;
  202. } else {
  203. if (pos->sector >= boundary)
  204. break;
  205. }
  206. if (rq->sector >= pos->sector)
  207. break;
  208. }
  209. list_add(&rq->queuelist, entry);
  210. }
  211. int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
  212. {
  213. elevator_t *e = q->elevator;
  214. int ret;
  215. if (q->last_merge) {
  216. ret = elv_try_merge(q->last_merge, bio);
  217. if (ret != ELEVATOR_NO_MERGE) {
  218. *req = q->last_merge;
  219. return ret;
  220. }
  221. }
  222. if (e->ops->elevator_merge_fn)
  223. return e->ops->elevator_merge_fn(q, req, bio);
  224. return ELEVATOR_NO_MERGE;
  225. }
  226. void elv_merged_request(request_queue_t *q, struct request *rq)
  227. {
  228. elevator_t *e = q->elevator;
  229. if (e->ops->elevator_merged_fn)
  230. e->ops->elevator_merged_fn(q, rq);
  231. q->last_merge = rq;
  232. }
  233. void elv_merge_requests(request_queue_t *q, struct request *rq,
  234. struct request *next)
  235. {
  236. elevator_t *e = q->elevator;
  237. if (e->ops->elevator_merge_req_fn)
  238. e->ops->elevator_merge_req_fn(q, rq, next);
  239. q->nr_sorted--;
  240. q->last_merge = rq;
  241. }
  242. void elv_requeue_request(request_queue_t *q, struct request *rq)
  243. {
  244. elevator_t *e = q->elevator;
  245. /*
  246. * it already went through dequeue, we need to decrement the
  247. * in_flight count again
  248. */
  249. if (blk_account_rq(rq)) {
  250. q->in_flight--;
  251. if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
  252. e->ops->elevator_deactivate_req_fn(q, rq);
  253. }
  254. rq->flags &= ~REQ_STARTED;
  255. elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
  256. }
  257. static void elv_drain_elevator(request_queue_t *q)
  258. {
  259. static int printed;
  260. while (q->elevator->ops->elevator_dispatch_fn(q, 1))
  261. ;
  262. if (q->nr_sorted == 0)
  263. return;
  264. if (printed++ < 10) {
  265. printk(KERN_ERR "%s: forced dispatching is broken "
  266. "(nr_sorted=%u), please report this\n",
  267. q->elevator->elevator_type->elevator_name, q->nr_sorted);
  268. }
  269. }
  270. void elv_insert(request_queue_t *q, struct request *rq, int where)
  271. {
  272. struct list_head *pos;
  273. unsigned ordseq;
  274. int unplug_it = 1;
  275. blk_add_trace_rq(q, rq, BLK_TA_INSERT);
  276. rq->q = q;
  277. switch (where) {
  278. case ELEVATOR_INSERT_FRONT:
  279. rq->flags |= REQ_SOFTBARRIER;
  280. list_add(&rq->queuelist, &q->queue_head);
  281. break;
  282. case ELEVATOR_INSERT_BACK:
  283. rq->flags |= REQ_SOFTBARRIER;
  284. elv_drain_elevator(q);
  285. list_add_tail(&rq->queuelist, &q->queue_head);
  286. /*
  287. * We kick the queue here for the following reasons.
  288. * - The elevator might have returned NULL previously
  289. * to delay requests and returned them now. As the
  290. * queue wasn't empty before this request, ll_rw_blk
  291. * won't run the queue on return, resulting in hang.
  292. * - Usually, back inserted requests won't be merged
  293. * with anything. There's no point in delaying queue
  294. * processing.
  295. */
  296. blk_remove_plug(q);
  297. q->request_fn(q);
  298. break;
  299. case ELEVATOR_INSERT_SORT:
  300. BUG_ON(!blk_fs_request(rq));
  301. rq->flags |= REQ_SORTED;
  302. q->nr_sorted++;
  303. if (q->last_merge == NULL && rq_mergeable(rq))
  304. q->last_merge = rq;
  305. /*
  306. * Some ioscheds (cfq) run q->request_fn directly, so
  307. * rq cannot be accessed after calling
  308. * elevator_add_req_fn.
  309. */
  310. q->elevator->ops->elevator_add_req_fn(q, rq);
  311. break;
  312. case ELEVATOR_INSERT_REQUEUE:
  313. /*
  314. * If ordered flush isn't in progress, we do front
  315. * insertion; otherwise, requests should be requeued
  316. * in ordseq order.
  317. */
  318. rq->flags |= REQ_SOFTBARRIER;
  319. if (q->ordseq == 0) {
  320. list_add(&rq->queuelist, &q->queue_head);
  321. break;
  322. }
  323. ordseq = blk_ordered_req_seq(rq);
  324. list_for_each(pos, &q->queue_head) {
  325. struct request *pos_rq = list_entry_rq(pos);
  326. if (ordseq <= blk_ordered_req_seq(pos_rq))
  327. break;
  328. }
  329. list_add_tail(&rq->queuelist, pos);
  330. /*
  331. * most requeues happen because of a busy condition, don't
  332. * force unplug of the queue for that case.
  333. */
  334. unplug_it = 0;
  335. break;
  336. default:
  337. printk(KERN_ERR "%s: bad insertion point %d\n",
  338. __FUNCTION__, where);
  339. BUG();
  340. }
  341. if (unplug_it && blk_queue_plugged(q)) {
  342. int nrq = q->rq.count[READ] + q->rq.count[WRITE]
  343. - q->in_flight;
  344. if (nrq >= q->unplug_thresh)
  345. __generic_unplug_device(q);
  346. }
  347. }
  348. void __elv_add_request(request_queue_t *q, struct request *rq, int where,
  349. int plug)
  350. {
  351. if (q->ordcolor)
  352. rq->flags |= REQ_ORDERED_COLOR;
  353. if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
  354. /*
  355. * toggle ordered color
  356. */
  357. if (blk_barrier_rq(rq))
  358. q->ordcolor ^= 1;
  359. /*
  360. * barriers implicitly indicate back insertion
  361. */
  362. if (where == ELEVATOR_INSERT_SORT)
  363. where = ELEVATOR_INSERT_BACK;
  364. /*
  365. * this request is scheduling boundary, update
  366. * end_sector
  367. */
  368. if (blk_fs_request(rq)) {
  369. q->end_sector = rq_end_sector(rq);
  370. q->boundary_rq = rq;
  371. }
  372. } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
  373. where = ELEVATOR_INSERT_BACK;
  374. if (plug)
  375. blk_plug_device(q);
  376. elv_insert(q, rq, where);
  377. }
  378. void elv_add_request(request_queue_t *q, struct request *rq, int where,
  379. int plug)
  380. {
  381. unsigned long flags;
  382. spin_lock_irqsave(q->queue_lock, flags);
  383. __elv_add_request(q, rq, where, plug);
  384. spin_unlock_irqrestore(q->queue_lock, flags);
  385. }
  386. static inline struct request *__elv_next_request(request_queue_t *q)
  387. {
  388. struct request *rq;
  389. while (1) {
  390. while (!list_empty(&q->queue_head)) {
  391. rq = list_entry_rq(q->queue_head.next);
  392. if (blk_do_ordered(q, &rq))
  393. return rq;
  394. }
  395. if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
  396. return NULL;
  397. }
  398. }
  399. struct request *elv_next_request(request_queue_t *q)
  400. {
  401. struct request *rq;
  402. int ret;
  403. while ((rq = __elv_next_request(q)) != NULL) {
  404. if (!(rq->flags & REQ_STARTED)) {
  405. elevator_t *e = q->elevator;
  406. /*
  407. * This is the first time the device driver
  408. * sees this request (possibly after
  409. * requeueing). Notify IO scheduler.
  410. */
  411. if (blk_sorted_rq(rq) &&
  412. e->ops->elevator_activate_req_fn)
  413. e->ops->elevator_activate_req_fn(q, rq);
  414. /*
  415. * just mark as started even if we don't start
  416. * it, a request that has been delayed should
  417. * not be passed by new incoming requests
  418. */
  419. rq->flags |= REQ_STARTED;
  420. blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
  421. }
  422. if (!q->boundary_rq || q->boundary_rq == rq) {
  423. q->end_sector = rq_end_sector(rq);
  424. q->boundary_rq = NULL;
  425. }
  426. if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
  427. break;
  428. ret = q->prep_rq_fn(q, rq);
  429. if (ret == BLKPREP_OK) {
  430. break;
  431. } else if (ret == BLKPREP_DEFER) {
  432. /*
  433. * the request may have been (partially) prepped.
  434. * we need to keep this request in the front to
  435. * avoid resource deadlock. REQ_STARTED will
  436. * prevent other fs requests from passing this one.
  437. */
  438. rq = NULL;
  439. break;
  440. } else if (ret == BLKPREP_KILL) {
  441. int nr_bytes = rq->hard_nr_sectors << 9;
  442. if (!nr_bytes)
  443. nr_bytes = rq->data_len;
  444. blkdev_dequeue_request(rq);
  445. rq->flags |= REQ_QUIET;
  446. end_that_request_chunk(rq, 0, nr_bytes);
  447. end_that_request_last(rq, 0);
  448. } else {
  449. printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
  450. ret);
  451. break;
  452. }
  453. }
  454. return rq;
  455. }
  456. void elv_dequeue_request(request_queue_t *q, struct request *rq)
  457. {
  458. BUG_ON(list_empty(&rq->queuelist));
  459. list_del_init(&rq->queuelist);
  460. /*
  461. * the time frame between a request being removed from the lists
  462. * and to it is freed is accounted as io that is in progress at
  463. * the driver side.
  464. */
  465. if (blk_account_rq(rq))
  466. q->in_flight++;
  467. }
  468. int elv_queue_empty(request_queue_t *q)
  469. {
  470. elevator_t *e = q->elevator;
  471. if (!list_empty(&q->queue_head))
  472. return 0;
  473. if (e->ops->elevator_queue_empty_fn)
  474. return e->ops->elevator_queue_empty_fn(q);
  475. return 1;
  476. }
  477. struct request *elv_latter_request(request_queue_t *q, struct request *rq)
  478. {
  479. elevator_t *e = q->elevator;
  480. if (e->ops->elevator_latter_req_fn)
  481. return e->ops->elevator_latter_req_fn(q, rq);
  482. return NULL;
  483. }
  484. struct request *elv_former_request(request_queue_t *q, struct request *rq)
  485. {
  486. elevator_t *e = q->elevator;
  487. if (e->ops->elevator_former_req_fn)
  488. return e->ops->elevator_former_req_fn(q, rq);
  489. return NULL;
  490. }
  491. int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
  492. gfp_t gfp_mask)
  493. {
  494. elevator_t *e = q->elevator;
  495. if (e->ops->elevator_set_req_fn)
  496. return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
  497. rq->elevator_private = NULL;
  498. return 0;
  499. }
  500. void elv_put_request(request_queue_t *q, struct request *rq)
  501. {
  502. elevator_t *e = q->elevator;
  503. if (e->ops->elevator_put_req_fn)
  504. e->ops->elevator_put_req_fn(q, rq);
  505. }
  506. int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
  507. {
  508. elevator_t *e = q->elevator;
  509. if (e->ops->elevator_may_queue_fn)
  510. return e->ops->elevator_may_queue_fn(q, rw, bio);
  511. return ELV_MQUEUE_MAY;
  512. }
  513. void elv_completed_request(request_queue_t *q, struct request *rq)
  514. {
  515. elevator_t *e = q->elevator;
  516. /*
  517. * request is released from the driver, io must be done
  518. */
  519. if (blk_account_rq(rq)) {
  520. q->in_flight--;
  521. if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
  522. e->ops->elevator_completed_req_fn(q, rq);
  523. }
  524. /*
  525. * Check if the queue is waiting for fs requests to be
  526. * drained for flush sequence.
  527. */
  528. if (unlikely(q->ordseq)) {
  529. struct request *first_rq = list_entry_rq(q->queue_head.next);
  530. if (q->in_flight == 0 &&
  531. blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
  532. blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
  533. blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
  534. q->request_fn(q);
  535. }
  536. }
  537. }
  538. #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
  539. static ssize_t
  540. elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  541. {
  542. elevator_t *e = container_of(kobj, elevator_t, kobj);
  543. struct elv_fs_entry *entry = to_elv(attr);
  544. ssize_t error;
  545. if (!entry->show)
  546. return -EIO;
  547. mutex_lock(&e->sysfs_lock);
  548. error = e->ops ? entry->show(e, page) : -ENOENT;
  549. mutex_unlock(&e->sysfs_lock);
  550. return error;
  551. }
  552. static ssize_t
  553. elv_attr_store(struct kobject *kobj, struct attribute *attr,
  554. const char *page, size_t length)
  555. {
  556. elevator_t *e = container_of(kobj, elevator_t, kobj);
  557. struct elv_fs_entry *entry = to_elv(attr);
  558. ssize_t error;
  559. if (!entry->store)
  560. return -EIO;
  561. mutex_lock(&e->sysfs_lock);
  562. error = e->ops ? entry->store(e, page, length) : -ENOENT;
  563. mutex_unlock(&e->sysfs_lock);
  564. return error;
  565. }
  566. static struct sysfs_ops elv_sysfs_ops = {
  567. .show = elv_attr_show,
  568. .store = elv_attr_store,
  569. };
  570. static struct kobj_type elv_ktype = {
  571. .sysfs_ops = &elv_sysfs_ops,
  572. .release = elevator_release,
  573. };
  574. int elv_register_queue(struct request_queue *q)
  575. {
  576. elevator_t *e = q->elevator;
  577. int error;
  578. e->kobj.parent = &q->kobj;
  579. error = kobject_add(&e->kobj);
  580. if (!error) {
  581. struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
  582. if (attr) {
  583. while (attr->attr.name) {
  584. if (sysfs_create_file(&e->kobj, &attr->attr))
  585. break;
  586. attr++;
  587. }
  588. }
  589. kobject_uevent(&e->kobj, KOBJ_ADD);
  590. }
  591. return error;
  592. }
  593. void elv_unregister_queue(struct request_queue *q)
  594. {
  595. if (q) {
  596. elevator_t *e = q->elevator;
  597. kobject_uevent(&e->kobj, KOBJ_REMOVE);
  598. kobject_del(&e->kobj);
  599. }
  600. }
  601. int elv_register(struct elevator_type *e)
  602. {
  603. spin_lock_irq(&elv_list_lock);
  604. BUG_ON(elevator_find(e->elevator_name));
  605. list_add_tail(&e->list, &elv_list);
  606. spin_unlock_irq(&elv_list_lock);
  607. printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
  608. if (!strcmp(e->elevator_name, chosen_elevator) ||
  609. (!*chosen_elevator &&
  610. !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
  611. printk(" (default)");
  612. printk("\n");
  613. return 0;
  614. }
  615. EXPORT_SYMBOL_GPL(elv_register);
  616. void elv_unregister(struct elevator_type *e)
  617. {
  618. struct task_struct *g, *p;
  619. /*
  620. * Iterate every thread in the process to remove the io contexts.
  621. */
  622. if (e->ops.trim) {
  623. read_lock(&tasklist_lock);
  624. do_each_thread(g, p) {
  625. task_lock(p);
  626. e->ops.trim(p->io_context);
  627. task_unlock(p);
  628. } while_each_thread(g, p);
  629. read_unlock(&tasklist_lock);
  630. }
  631. spin_lock_irq(&elv_list_lock);
  632. list_del_init(&e->list);
  633. spin_unlock_irq(&elv_list_lock);
  634. }
  635. EXPORT_SYMBOL_GPL(elv_unregister);
  636. /*
  637. * switch to new_e io scheduler. be careful not to introduce deadlocks -
  638. * we don't free the old io scheduler, before we have allocated what we
  639. * need for the new one. this way we have a chance of going back to the old
  640. * one, if the new one fails init for some reason.
  641. */
  642. static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
  643. {
  644. elevator_t *old_elevator, *e;
  645. /*
  646. * Allocate new elevator
  647. */
  648. e = elevator_alloc(new_e);
  649. if (!e)
  650. return 0;
  651. /*
  652. * Turn on BYPASS and drain all requests w/ elevator private data
  653. */
  654. spin_lock_irq(q->queue_lock);
  655. set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  656. elv_drain_elevator(q);
  657. while (q->rq.elvpriv) {
  658. blk_remove_plug(q);
  659. q->request_fn(q);
  660. spin_unlock_irq(q->queue_lock);
  661. msleep(10);
  662. spin_lock_irq(q->queue_lock);
  663. elv_drain_elevator(q);
  664. }
  665. spin_unlock_irq(q->queue_lock);
  666. /*
  667. * unregister old elevator data
  668. */
  669. elv_unregister_queue(q);
  670. old_elevator = q->elevator;
  671. /*
  672. * attach and start new elevator
  673. */
  674. if (elevator_attach(q, e))
  675. goto fail;
  676. if (elv_register_queue(q))
  677. goto fail_register;
  678. /*
  679. * finally exit old elevator and turn off BYPASS.
  680. */
  681. elevator_exit(old_elevator);
  682. clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  683. return 1;
  684. fail_register:
  685. /*
  686. * switch failed, exit the new io scheduler and reattach the old
  687. * one again (along with re-adding the sysfs dir)
  688. */
  689. elevator_exit(e);
  690. e = NULL;
  691. fail:
  692. q->elevator = old_elevator;
  693. elv_register_queue(q);
  694. clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
  695. if (e)
  696. kobject_put(&e->kobj);
  697. return 0;
  698. }
  699. ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
  700. {
  701. char elevator_name[ELV_NAME_MAX];
  702. size_t len;
  703. struct elevator_type *e;
  704. elevator_name[sizeof(elevator_name) - 1] = '\0';
  705. strncpy(elevator_name, name, sizeof(elevator_name) - 1);
  706. len = strlen(elevator_name);
  707. if (len && elevator_name[len - 1] == '\n')
  708. elevator_name[len - 1] = '\0';
  709. e = elevator_get(elevator_name);
  710. if (!e) {
  711. printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
  712. return -EINVAL;
  713. }
  714. if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
  715. elevator_put(e);
  716. return count;
  717. }
  718. if (!elevator_switch(q, e))
  719. printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
  720. return count;
  721. }
  722. ssize_t elv_iosched_show(request_queue_t *q, char *name)
  723. {
  724. elevator_t *e = q->elevator;
  725. struct elevator_type *elv = e->elevator_type;
  726. struct list_head *entry;
  727. int len = 0;
  728. spin_lock_irq(q->queue_lock);
  729. list_for_each(entry, &elv_list) {
  730. struct elevator_type *__e;
  731. __e = list_entry(entry, struct elevator_type, list);
  732. if (!strcmp(elv->elevator_name, __e->elevator_name))
  733. len += sprintf(name+len, "[%s] ", elv->elevator_name);
  734. else
  735. len += sprintf(name+len, "%s ", __e->elevator_name);
  736. }
  737. spin_unlock_irq(q->queue_lock);
  738. len += sprintf(len+name, "\n");
  739. return len;
  740. }
  741. EXPORT_SYMBOL(elv_dispatch_sort);
  742. EXPORT_SYMBOL(elv_add_request);
  743. EXPORT_SYMBOL(__elv_add_request);
  744. EXPORT_SYMBOL(elv_next_request);
  745. EXPORT_SYMBOL(elv_dequeue_request);
  746. EXPORT_SYMBOL(elv_queue_empty);
  747. EXPORT_SYMBOL(elevator_exit);
  748. EXPORT_SYMBOL(elevator_init);