cfq-iosched.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859
  1. /*
  2. * linux/drivers/block/cfq-iosched.c
  3. *
  4. * CFQ, or complete fairness queueing, disk scheduler.
  5. *
  6. * Based on ideas from a previously unfinished io
  7. * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
  8. *
  9. * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/fs.h>
  13. #include <linux/blkdev.h>
  14. #include <linux/elevator.h>
  15. #include <linux/bio.h>
  16. #include <linux/config.h>
  17. #include <linux/module.h>
  18. #include <linux/slab.h>
  19. #include <linux/init.h>
  20. #include <linux/compiler.h>
  21. #include <linux/hash.h>
  22. #include <linux/rbtree.h>
  23. #include <linux/mempool.h>
  24. static unsigned long max_elapsed_crq;
  25. static unsigned long max_elapsed_dispatch;
  26. /*
  27. * tunables
  28. */
  29. static int cfq_quantum = 4; /* max queue in one round of service */
  30. static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
  31. static int cfq_service = HZ; /* period over which service is avg */
  32. static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */
  33. static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */
  34. static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */
  35. static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
  36. static int cfq_back_penalty = 2; /* penalty of a backwards seek */
  37. /*
  38. * for the hash of cfqq inside the cfqd
  39. */
  40. #define CFQ_QHASH_SHIFT 6
  41. #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
  42. #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
  43. /*
  44. * for the hash of crq inside the cfqq
  45. */
  46. #define CFQ_MHASH_SHIFT 6
  47. #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
  48. #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
  49. #define CFQ_MHASH_FN(sec) hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
  50. #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
  51. #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
  52. #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
  53. #define RQ_DATA(rq) (rq)->elevator_private
  54. /*
  55. * rb-tree defines
  56. */
  57. #define RB_NONE (2)
  58. #define RB_EMPTY(node) ((node)->rb_node == NULL)
  59. #define RB_CLEAR_COLOR(node) (node)->rb_color = RB_NONE
  60. #define RB_CLEAR(node) do { \
  61. (node)->rb_parent = NULL; \
  62. RB_CLEAR_COLOR((node)); \
  63. (node)->rb_right = NULL; \
  64. (node)->rb_left = NULL; \
  65. } while (0)
  66. #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
  67. #define ON_RB(node) ((node)->rb_color != RB_NONE)
  68. #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
  69. #define rq_rb_key(rq) (rq)->sector
  70. /*
  71. * threshold for switching off non-tag accounting
  72. */
  73. #define CFQ_MAX_TAG (4)
  74. /*
  75. * sort key types and names
  76. */
  77. enum {
  78. CFQ_KEY_PGID,
  79. CFQ_KEY_TGID,
  80. CFQ_KEY_UID,
  81. CFQ_KEY_GID,
  82. CFQ_KEY_LAST,
  83. };
  84. static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
  85. static kmem_cache_t *crq_pool;
  86. static kmem_cache_t *cfq_pool;
  87. static kmem_cache_t *cfq_ioc_pool;
  88. struct cfq_data {
  89. struct list_head rr_list;
  90. struct list_head empty_list;
  91. struct hlist_head *cfq_hash;
  92. struct hlist_head *crq_hash;
  93. /* queues on rr_list (ie they have pending requests */
  94. unsigned int busy_queues;
  95. unsigned int max_queued;
  96. atomic_t ref;
  97. int key_type;
  98. mempool_t *crq_pool;
  99. request_queue_t *queue;
  100. sector_t last_sector;
  101. int rq_in_driver;
  102. /*
  103. * tunables, see top of file
  104. */
  105. unsigned int cfq_quantum;
  106. unsigned int cfq_queued;
  107. unsigned int cfq_fifo_expire_r;
  108. unsigned int cfq_fifo_expire_w;
  109. unsigned int cfq_fifo_batch_expire;
  110. unsigned int cfq_back_penalty;
  111. unsigned int cfq_back_max;
  112. unsigned int find_best_crq;
  113. unsigned int cfq_tagged;
  114. };
  115. struct cfq_queue {
  116. /* reference count */
  117. atomic_t ref;
  118. /* parent cfq_data */
  119. struct cfq_data *cfqd;
  120. /* hash of mergeable requests */
  121. struct hlist_node cfq_hash;
  122. /* hash key */
  123. unsigned long key;
  124. /* whether queue is on rr (or empty) list */
  125. int on_rr;
  126. /* on either rr or empty list of cfqd */
  127. struct list_head cfq_list;
  128. /* sorted list of pending requests */
  129. struct rb_root sort_list;
  130. /* if fifo isn't expired, next request to serve */
  131. struct cfq_rq *next_crq;
  132. /* requests queued in sort_list */
  133. int queued[2];
  134. /* currently allocated requests */
  135. int allocated[2];
  136. /* fifo list of requests in sort_list */
  137. struct list_head fifo[2];
  138. /* last time fifo expired */
  139. unsigned long last_fifo_expire;
  140. int key_type;
  141. unsigned long service_start;
  142. unsigned long service_used;
  143. unsigned int max_rate;
  144. /* number of requests that have been handed to the driver */
  145. int in_flight;
  146. /* number of currently allocated requests */
  147. int alloc_limit[2];
  148. };
  149. struct cfq_rq {
  150. struct rb_node rb_node;
  151. sector_t rb_key;
  152. struct request *request;
  153. struct hlist_node hash;
  154. struct cfq_queue *cfq_queue;
  155. struct cfq_io_context *io_context;
  156. unsigned long service_start;
  157. unsigned long queue_start;
  158. unsigned int in_flight : 1;
  159. unsigned int accounted : 1;
  160. unsigned int is_sync : 1;
  161. unsigned int is_write : 1;
  162. };
  163. static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
  164. static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
  165. static void cfq_update_next_crq(struct cfq_rq *);
  166. static void cfq_put_cfqd(struct cfq_data *cfqd);
  167. /*
  168. * what the fairness is based on (ie how processes are grouped and
  169. * differentiated)
  170. */
  171. static inline unsigned long
  172. cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk)
  173. {
  174. /*
  175. * optimize this so that ->key_type is the offset into the struct
  176. */
  177. switch (cfqd->key_type) {
  178. case CFQ_KEY_PGID:
  179. return process_group(tsk);
  180. default:
  181. case CFQ_KEY_TGID:
  182. return tsk->tgid;
  183. case CFQ_KEY_UID:
  184. return tsk->uid;
  185. case CFQ_KEY_GID:
  186. return tsk->gid;
  187. }
  188. }
  189. /*
  190. * lots of deadline iosched dupes, can be abstracted later...
  191. */
  192. static inline void cfq_del_crq_hash(struct cfq_rq *crq)
  193. {
  194. hlist_del_init(&crq->hash);
  195. }
  196. static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
  197. {
  198. cfq_del_crq_hash(crq);
  199. if (q->last_merge == crq->request)
  200. q->last_merge = NULL;
  201. cfq_update_next_crq(crq);
  202. }
  203. static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
  204. {
  205. const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
  206. BUG_ON(!hlist_unhashed(&crq->hash));
  207. hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
  208. }
  209. static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
  210. {
  211. struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
  212. struct hlist_node *entry, *next;
  213. hlist_for_each_safe(entry, next, hash_list) {
  214. struct cfq_rq *crq = list_entry_hash(entry);
  215. struct request *__rq = crq->request;
  216. BUG_ON(hlist_unhashed(&crq->hash));
  217. if (!rq_mergeable(__rq)) {
  218. cfq_del_crq_hash(crq);
  219. continue;
  220. }
  221. if (rq_hash_key(__rq) == offset)
  222. return __rq;
  223. }
  224. return NULL;
  225. }
  226. /*
  227. * Lifted from AS - choose which of crq1 and crq2 that is best served now.
  228. * We choose the request that is closest to the head right now. Distance
  229. * behind the head are penalized and only allowed to a certain extent.
  230. */
  231. static struct cfq_rq *
  232. cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
  233. {
  234. sector_t last, s1, s2, d1 = 0, d2 = 0;
  235. int r1_wrap = 0, r2_wrap = 0; /* requests are behind the disk head */
  236. unsigned long back_max;
  237. if (crq1 == NULL || crq1 == crq2)
  238. return crq2;
  239. if (crq2 == NULL)
  240. return crq1;
  241. s1 = crq1->request->sector;
  242. s2 = crq2->request->sector;
  243. last = cfqd->last_sector;
  244. #if 0
  245. if (!list_empty(&cfqd->queue->queue_head)) {
  246. struct list_head *entry = &cfqd->queue->queue_head;
  247. unsigned long distance = ~0UL;
  248. struct request *rq;
  249. while ((entry = entry->prev) != &cfqd->queue->queue_head) {
  250. rq = list_entry_rq(entry);
  251. if (blk_barrier_rq(rq))
  252. break;
  253. if (distance < abs(s1 - rq->sector + rq->nr_sectors)) {
  254. distance = abs(s1 - rq->sector +rq->nr_sectors);
  255. last = rq->sector + rq->nr_sectors;
  256. }
  257. if (distance < abs(s2 - rq->sector + rq->nr_sectors)) {
  258. distance = abs(s2 - rq->sector +rq->nr_sectors);
  259. last = rq->sector + rq->nr_sectors;
  260. }
  261. }
  262. }
  263. #endif
  264. /*
  265. * by definition, 1KiB is 2 sectors
  266. */
  267. back_max = cfqd->cfq_back_max * 2;
  268. /*
  269. * Strict one way elevator _except_ in the case where we allow
  270. * short backward seeks which are biased as twice the cost of a
  271. * similar forward seek.
  272. */
  273. if (s1 >= last)
  274. d1 = s1 - last;
  275. else if (s1 + back_max >= last)
  276. d1 = (last - s1) * cfqd->cfq_back_penalty;
  277. else
  278. r1_wrap = 1;
  279. if (s2 >= last)
  280. d2 = s2 - last;
  281. else if (s2 + back_max >= last)
  282. d2 = (last - s2) * cfqd->cfq_back_penalty;
  283. else
  284. r2_wrap = 1;
  285. /* Found required data */
  286. if (!r1_wrap && r2_wrap)
  287. return crq1;
  288. else if (!r2_wrap && r1_wrap)
  289. return crq2;
  290. else if (r1_wrap && r2_wrap) {
  291. /* both behind the head */
  292. if (s1 <= s2)
  293. return crq1;
  294. else
  295. return crq2;
  296. }
  297. /* Both requests in front of the head */
  298. if (d1 < d2)
  299. return crq1;
  300. else if (d2 < d1)
  301. return crq2;
  302. else {
  303. if (s1 >= s2)
  304. return crq1;
  305. else
  306. return crq2;
  307. }
  308. }
  309. /*
  310. * would be nice to take fifo expire time into account as well
  311. */
  312. static struct cfq_rq *
  313. cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  314. struct cfq_rq *last)
  315. {
  316. struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
  317. struct rb_node *rbnext, *rbprev;
  318. if (!ON_RB(&last->rb_node))
  319. return NULL;
  320. if ((rbnext = rb_next(&last->rb_node)) == NULL)
  321. rbnext = rb_first(&cfqq->sort_list);
  322. rbprev = rb_prev(&last->rb_node);
  323. if (rbprev)
  324. crq_prev = rb_entry_crq(rbprev);
  325. if (rbnext)
  326. crq_next = rb_entry_crq(rbnext);
  327. return cfq_choose_req(cfqd, crq_next, crq_prev);
  328. }
  329. static void cfq_update_next_crq(struct cfq_rq *crq)
  330. {
  331. struct cfq_queue *cfqq = crq->cfq_queue;
  332. if (cfqq->next_crq == crq)
  333. cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
  334. }
  335. static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
  336. {
  337. struct list_head *head = &cfqq->cfqd->rr_list;
  338. struct list_head *next, *prev;
  339. /*
  340. * list might still be ordered
  341. */
  342. next = cfqq->cfq_list.next;
  343. if (next != head) {
  344. struct cfq_queue *cnext = list_entry_cfqq(next);
  345. if (cfqq->service_used > cnext->service_used)
  346. return 1;
  347. }
  348. prev = cfqq->cfq_list.prev;
  349. if (prev != head) {
  350. struct cfq_queue *cprev = list_entry_cfqq(prev);
  351. if (cfqq->service_used < cprev->service_used)
  352. return 1;
  353. }
  354. return 0;
  355. }
  356. static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
  357. {
  358. struct list_head *entry = &cfqq->cfqd->rr_list;
  359. if (!cfqq->on_rr)
  360. return;
  361. if (!new_queue && !cfq_check_sort_rr_list(cfqq))
  362. return;
  363. list_del(&cfqq->cfq_list);
  364. /*
  365. * sort by our mean service_used, sub-sort by in-flight requests
  366. */
  367. while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
  368. struct cfq_queue *__cfqq = list_entry_cfqq(entry);
  369. if (cfqq->service_used > __cfqq->service_used)
  370. break;
  371. else if (cfqq->service_used == __cfqq->service_used) {
  372. struct list_head *prv;
  373. while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
  374. __cfqq = list_entry_cfqq(prv);
  375. WARN_ON(__cfqq->service_used > cfqq->service_used);
  376. if (cfqq->service_used != __cfqq->service_used)
  377. break;
  378. if (cfqq->in_flight > __cfqq->in_flight)
  379. break;
  380. entry = prv;
  381. }
  382. }
  383. }
  384. list_add(&cfqq->cfq_list, entry);
  385. }
  386. /*
  387. * add to busy list of queues for service, trying to be fair in ordering
  388. * the pending list according to requests serviced
  389. */
  390. static inline void
  391. cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  392. {
  393. /*
  394. * it's currently on the empty list
  395. */
  396. cfqq->on_rr = 1;
  397. cfqd->busy_queues++;
  398. if (time_after(jiffies, cfqq->service_start + cfq_service))
  399. cfqq->service_used >>= 3;
  400. cfq_sort_rr_list(cfqq, 1);
  401. }
  402. static inline void
  403. cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  404. {
  405. list_move(&cfqq->cfq_list, &cfqd->empty_list);
  406. cfqq->on_rr = 0;
  407. BUG_ON(!cfqd->busy_queues);
  408. cfqd->busy_queues--;
  409. }
  410. /*
  411. * rb tree support functions
  412. */
  413. static inline void cfq_del_crq_rb(struct cfq_rq *crq)
  414. {
  415. struct cfq_queue *cfqq = crq->cfq_queue;
  416. if (ON_RB(&crq->rb_node)) {
  417. struct cfq_data *cfqd = cfqq->cfqd;
  418. BUG_ON(!cfqq->queued[crq->is_sync]);
  419. cfq_update_next_crq(crq);
  420. cfqq->queued[crq->is_sync]--;
  421. rb_erase(&crq->rb_node, &cfqq->sort_list);
  422. RB_CLEAR_COLOR(&crq->rb_node);
  423. if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
  424. cfq_del_cfqq_rr(cfqd, cfqq);
  425. }
  426. }
  427. static struct cfq_rq *
  428. __cfq_add_crq_rb(struct cfq_rq *crq)
  429. {
  430. struct rb_node **p = &crq->cfq_queue->sort_list.rb_node;
  431. struct rb_node *parent = NULL;
  432. struct cfq_rq *__crq;
  433. while (*p) {
  434. parent = *p;
  435. __crq = rb_entry_crq(parent);
  436. if (crq->rb_key < __crq->rb_key)
  437. p = &(*p)->rb_left;
  438. else if (crq->rb_key > __crq->rb_key)
  439. p = &(*p)->rb_right;
  440. else
  441. return __crq;
  442. }
  443. rb_link_node(&crq->rb_node, parent, p);
  444. return NULL;
  445. }
  446. static void cfq_add_crq_rb(struct cfq_rq *crq)
  447. {
  448. struct cfq_queue *cfqq = crq->cfq_queue;
  449. struct cfq_data *cfqd = cfqq->cfqd;
  450. struct request *rq = crq->request;
  451. struct cfq_rq *__alias;
  452. crq->rb_key = rq_rb_key(rq);
  453. cfqq->queued[crq->is_sync]++;
  454. /*
  455. * looks a little odd, but the first insert might return an alias.
  456. * if that happens, put the alias on the dispatch list
  457. */
  458. while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
  459. cfq_dispatch_sort(cfqd->queue, __alias);
  460. rb_insert_color(&crq->rb_node, &cfqq->sort_list);
  461. if (!cfqq->on_rr)
  462. cfq_add_cfqq_rr(cfqd, cfqq);
  463. /*
  464. * check if this request is a better next-serve candidate
  465. */
  466. cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq);
  467. }
  468. static inline void
  469. cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
  470. {
  471. if (ON_RB(&crq->rb_node)) {
  472. rb_erase(&crq->rb_node, &cfqq->sort_list);
  473. cfqq->queued[crq->is_sync]--;
  474. }
  475. cfq_add_crq_rb(crq);
  476. }
  477. static struct request *
  478. cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
  479. {
  480. const unsigned long key = cfq_hash_key(cfqd, current);
  481. struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key);
  482. struct rb_node *n;
  483. if (!cfqq)
  484. goto out;
  485. n = cfqq->sort_list.rb_node;
  486. while (n) {
  487. struct cfq_rq *crq = rb_entry_crq(n);
  488. if (sector < crq->rb_key)
  489. n = n->rb_left;
  490. else if (sector > crq->rb_key)
  491. n = n->rb_right;
  492. else
  493. return crq->request;
  494. }
  495. out:
  496. return NULL;
  497. }
  498. static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
  499. {
  500. struct cfq_rq *crq = RQ_DATA(rq);
  501. if (crq) {
  502. struct cfq_queue *cfqq = crq->cfq_queue;
  503. if (cfqq->cfqd->cfq_tagged) {
  504. cfqq->service_used--;
  505. cfq_sort_rr_list(cfqq, 0);
  506. }
  507. if (crq->accounted) {
  508. crq->accounted = 0;
  509. cfqq->cfqd->rq_in_driver--;
  510. }
  511. }
  512. }
  513. /*
  514. * make sure the service time gets corrected on reissue of this request
  515. */
  516. static void cfq_requeue_request(request_queue_t *q, struct request *rq)
  517. {
  518. cfq_deactivate_request(q, rq);
  519. list_add(&rq->queuelist, &q->queue_head);
  520. }
  521. static void cfq_remove_request(request_queue_t *q, struct request *rq)
  522. {
  523. struct cfq_rq *crq = RQ_DATA(rq);
  524. if (crq) {
  525. cfq_remove_merge_hints(q, crq);
  526. list_del_init(&rq->queuelist);
  527. if (crq->cfq_queue)
  528. cfq_del_crq_rb(crq);
  529. }
  530. }
  531. static int
  532. cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
  533. {
  534. struct cfq_data *cfqd = q->elevator->elevator_data;
  535. struct request *__rq;
  536. int ret;
  537. ret = elv_try_last_merge(q, bio);
  538. if (ret != ELEVATOR_NO_MERGE) {
  539. __rq = q->last_merge;
  540. goto out_insert;
  541. }
  542. __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
  543. if (__rq) {
  544. BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
  545. if (elv_rq_merge_ok(__rq, bio)) {
  546. ret = ELEVATOR_BACK_MERGE;
  547. goto out;
  548. }
  549. }
  550. __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
  551. if (__rq) {
  552. if (elv_rq_merge_ok(__rq, bio)) {
  553. ret = ELEVATOR_FRONT_MERGE;
  554. goto out;
  555. }
  556. }
  557. return ELEVATOR_NO_MERGE;
  558. out:
  559. q->last_merge = __rq;
  560. out_insert:
  561. *req = __rq;
  562. return ret;
  563. }
  564. static void cfq_merged_request(request_queue_t *q, struct request *req)
  565. {
  566. struct cfq_data *cfqd = q->elevator->elevator_data;
  567. struct cfq_rq *crq = RQ_DATA(req);
  568. cfq_del_crq_hash(crq);
  569. cfq_add_crq_hash(cfqd, crq);
  570. if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
  571. struct cfq_queue *cfqq = crq->cfq_queue;
  572. cfq_update_next_crq(crq);
  573. cfq_reposition_crq_rb(cfqq, crq);
  574. }
  575. q->last_merge = req;
  576. }
  577. static void
  578. cfq_merged_requests(request_queue_t *q, struct request *rq,
  579. struct request *next)
  580. {
  581. struct cfq_rq *crq = RQ_DATA(rq);
  582. struct cfq_rq *cnext = RQ_DATA(next);
  583. cfq_merged_request(q, rq);
  584. if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
  585. if (time_before(cnext->queue_start, crq->queue_start)) {
  586. list_move(&rq->queuelist, &next->queuelist);
  587. crq->queue_start = cnext->queue_start;
  588. }
  589. }
  590. cfq_update_next_crq(cnext);
  591. cfq_remove_request(q, next);
  592. }
  593. /*
  594. * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
  595. * this function sector sorts the selected request to minimize seeks. we start
  596. * at cfqd->last_sector, not 0.
  597. */
  598. static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
  599. {
  600. struct cfq_data *cfqd = q->elevator->elevator_data;
  601. struct cfq_queue *cfqq = crq->cfq_queue;
  602. struct list_head *head = &q->queue_head, *entry = head;
  603. struct request *__rq;
  604. sector_t last;
  605. cfq_del_crq_rb(crq);
  606. cfq_remove_merge_hints(q, crq);
  607. list_del(&crq->request->queuelist);
  608. last = cfqd->last_sector;
  609. while ((entry = entry->prev) != head) {
  610. __rq = list_entry_rq(entry);
  611. if (blk_barrier_rq(crq->request))
  612. break;
  613. if (!blk_fs_request(crq->request))
  614. break;
  615. if (crq->request->sector > __rq->sector)
  616. break;
  617. if (__rq->sector > last && crq->request->sector < last) {
  618. last = crq->request->sector;
  619. break;
  620. }
  621. }
  622. cfqd->last_sector = last;
  623. crq->in_flight = 1;
  624. cfqq->in_flight++;
  625. list_add(&crq->request->queuelist, entry);
  626. }
  627. /*
  628. * return expired entry, or NULL to just start from scratch in rbtree
  629. */
  630. static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
  631. {
  632. struct cfq_data *cfqd = cfqq->cfqd;
  633. const int reads = !list_empty(&cfqq->fifo[0]);
  634. const int writes = !list_empty(&cfqq->fifo[1]);
  635. unsigned long now = jiffies;
  636. struct cfq_rq *crq;
  637. if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
  638. return NULL;
  639. crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
  640. if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
  641. cfqq->last_fifo_expire = now;
  642. return crq;
  643. }
  644. crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
  645. if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
  646. cfqq->last_fifo_expire = now;
  647. return crq;
  648. }
  649. return NULL;
  650. }
  651. /*
  652. * dispatch a single request from given queue
  653. */
  654. static inline void
  655. cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
  656. struct cfq_queue *cfqq)
  657. {
  658. struct cfq_rq *crq;
  659. /*
  660. * follow expired path, else get first next available
  661. */
  662. if ((crq = cfq_check_fifo(cfqq)) == NULL) {
  663. if (cfqd->find_best_crq)
  664. crq = cfqq->next_crq;
  665. else
  666. crq = rb_entry_crq(rb_first(&cfqq->sort_list));
  667. }
  668. cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
  669. /*
  670. * finally, insert request into driver list
  671. */
  672. cfq_dispatch_sort(q, crq);
  673. }
  674. static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
  675. {
  676. struct cfq_data *cfqd = q->elevator->elevator_data;
  677. struct cfq_queue *cfqq;
  678. struct list_head *entry, *tmp;
  679. int queued, busy_queues, first_round;
  680. if (list_empty(&cfqd->rr_list))
  681. return 0;
  682. queued = 0;
  683. first_round = 1;
  684. restart:
  685. busy_queues = 0;
  686. list_for_each_safe(entry, tmp, &cfqd->rr_list) {
  687. cfqq = list_entry_cfqq(entry);
  688. BUG_ON(RB_EMPTY(&cfqq->sort_list));
  689. /*
  690. * first round of queueing, only select from queues that
  691. * don't already have io in-flight
  692. */
  693. if (first_round && cfqq->in_flight)
  694. continue;
  695. cfq_dispatch_request(q, cfqd, cfqq);
  696. if (!RB_EMPTY(&cfqq->sort_list))
  697. busy_queues++;
  698. queued++;
  699. }
  700. if ((queued < max_dispatch) && (busy_queues || first_round)) {
  701. first_round = 0;
  702. goto restart;
  703. }
  704. return queued;
  705. }
  706. static inline void cfq_account_dispatch(struct cfq_rq *crq)
  707. {
  708. struct cfq_queue *cfqq = crq->cfq_queue;
  709. struct cfq_data *cfqd = cfqq->cfqd;
  710. unsigned long now, elapsed;
  711. if (!blk_fs_request(crq->request))
  712. return;
  713. /*
  714. * accounted bit is necessary since some drivers will call
  715. * elv_next_request() many times for the same request (eg ide)
  716. */
  717. if (crq->accounted)
  718. return;
  719. now = jiffies;
  720. if (cfqq->service_start == ~0UL)
  721. cfqq->service_start = now;
  722. /*
  723. * on drives with tagged command queueing, command turn-around time
  724. * doesn't necessarily reflect the time spent processing this very
  725. * command inside the drive. so do the accounting differently there,
  726. * by just sorting on the number of requests
  727. */
  728. if (cfqd->cfq_tagged) {
  729. if (time_after(now, cfqq->service_start + cfq_service)) {
  730. cfqq->service_start = now;
  731. cfqq->service_used /= 10;
  732. }
  733. cfqq->service_used++;
  734. cfq_sort_rr_list(cfqq, 0);
  735. }
  736. elapsed = now - crq->queue_start;
  737. if (elapsed > max_elapsed_dispatch)
  738. max_elapsed_dispatch = elapsed;
  739. crq->accounted = 1;
  740. crq->service_start = now;
  741. if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) {
  742. cfqq->cfqd->cfq_tagged = 1;
  743. printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG);
  744. }
  745. }
  746. static inline void
  747. cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
  748. {
  749. struct cfq_data *cfqd = cfqq->cfqd;
  750. if (!crq->accounted)
  751. return;
  752. WARN_ON(!cfqd->rq_in_driver);
  753. cfqd->rq_in_driver--;
  754. if (!cfqd->cfq_tagged) {
  755. unsigned long now = jiffies;
  756. unsigned long duration = now - crq->service_start;
  757. if (time_after(now, cfqq->service_start + cfq_service)) {
  758. cfqq->service_start = now;
  759. cfqq->service_used >>= 3;
  760. }
  761. cfqq->service_used += duration;
  762. cfq_sort_rr_list(cfqq, 0);
  763. if (duration > max_elapsed_crq)
  764. max_elapsed_crq = duration;
  765. }
  766. }
  767. static struct request *cfq_next_request(request_queue_t *q)
  768. {
  769. struct cfq_data *cfqd = q->elevator->elevator_data;
  770. struct request *rq;
  771. if (!list_empty(&q->queue_head)) {
  772. struct cfq_rq *crq;
  773. dispatch:
  774. rq = list_entry_rq(q->queue_head.next);
  775. if ((crq = RQ_DATA(rq)) != NULL) {
  776. cfq_remove_merge_hints(q, crq);
  777. cfq_account_dispatch(crq);
  778. }
  779. return rq;
  780. }
  781. if (cfq_dispatch_requests(q, cfqd->cfq_quantum))
  782. goto dispatch;
  783. return NULL;
  784. }
  785. /*
  786. * task holds one reference to the queue, dropped when task exits. each crq
  787. * in-flight on this queue also holds a reference, dropped when crq is freed.
  788. *
  789. * queue lock must be held here.
  790. */
  791. static void cfq_put_queue(struct cfq_queue *cfqq)
  792. {
  793. BUG_ON(!atomic_read(&cfqq->ref));
  794. if (!atomic_dec_and_test(&cfqq->ref))
  795. return;
  796. BUG_ON(rb_first(&cfqq->sort_list));
  797. BUG_ON(cfqq->on_rr);
  798. cfq_put_cfqd(cfqq->cfqd);
  799. /*
  800. * it's on the empty list and still hashed
  801. */
  802. list_del(&cfqq->cfq_list);
  803. hlist_del(&cfqq->cfq_hash);
  804. kmem_cache_free(cfq_pool, cfqq);
  805. }
  806. static inline struct cfq_queue *
  807. __cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key, const int hashval)
  808. {
  809. struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
  810. struct hlist_node *entry, *next;
  811. hlist_for_each_safe(entry, next, hash_list) {
  812. struct cfq_queue *__cfqq = list_entry_qhash(entry);
  813. if (__cfqq->key == key)
  814. return __cfqq;
  815. }
  816. return NULL;
  817. }
  818. static struct cfq_queue *
  819. cfq_find_cfq_hash(struct cfq_data *cfqd, unsigned long key)
  820. {
  821. return __cfq_find_cfq_hash(cfqd, key, hash_long(key, CFQ_QHASH_SHIFT));
  822. }
  823. static inline void
  824. cfq_rehash_cfqq(struct cfq_data *cfqd, struct cfq_queue **cfqq,
  825. struct cfq_io_context *cic)
  826. {
  827. unsigned long hashkey = cfq_hash_key(cfqd, current);
  828. unsigned long hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
  829. struct cfq_queue *__cfqq;
  830. unsigned long flags;
  831. spin_lock_irqsave(cfqd->queue->queue_lock, flags);
  832. hlist_del(&(*cfqq)->cfq_hash);
  833. __cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval);
  834. if (!__cfqq || __cfqq == *cfqq) {
  835. __cfqq = *cfqq;
  836. hlist_add_head(&__cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
  837. __cfqq->key_type = cfqd->key_type;
  838. } else {
  839. atomic_inc(&__cfqq->ref);
  840. cic->cfqq = __cfqq;
  841. cfq_put_queue(*cfqq);
  842. *cfqq = __cfqq;
  843. }
  844. cic->cfqq = __cfqq;
  845. spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
  846. }
  847. static void cfq_free_io_context(struct cfq_io_context *cic)
  848. {
  849. kmem_cache_free(cfq_ioc_pool, cic);
  850. }
  851. /*
  852. * locking hierarchy is: io_context lock -> queue locks
  853. */
  854. static void cfq_exit_io_context(struct cfq_io_context *cic)
  855. {
  856. struct cfq_queue *cfqq = cic->cfqq;
  857. struct list_head *entry = &cic->list;
  858. request_queue_t *q;
  859. unsigned long flags;
  860. /*
  861. * put the reference this task is holding to the various queues
  862. */
  863. spin_lock_irqsave(&cic->ioc->lock, flags);
  864. while ((entry = cic->list.next) != &cic->list) {
  865. struct cfq_io_context *__cic;
  866. __cic = list_entry(entry, struct cfq_io_context, list);
  867. list_del(entry);
  868. q = __cic->cfqq->cfqd->queue;
  869. spin_lock(q->queue_lock);
  870. cfq_put_queue(__cic->cfqq);
  871. spin_unlock(q->queue_lock);
  872. }
  873. q = cfqq->cfqd->queue;
  874. spin_lock(q->queue_lock);
  875. cfq_put_queue(cfqq);
  876. spin_unlock(q->queue_lock);
  877. cic->cfqq = NULL;
  878. spin_unlock_irqrestore(&cic->ioc->lock, flags);
  879. }
  880. static struct cfq_io_context *cfq_alloc_io_context(int gfp_flags)
  881. {
  882. struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_flags);
  883. if (cic) {
  884. cic->dtor = cfq_free_io_context;
  885. cic->exit = cfq_exit_io_context;
  886. INIT_LIST_HEAD(&cic->list);
  887. cic->cfqq = NULL;
  888. }
  889. return cic;
  890. }
  891. /*
  892. * Setup general io context and cfq io context. There can be several cfq
  893. * io contexts per general io context, if this process is doing io to more
  894. * than one device managed by cfq. Note that caller is holding a reference to
  895. * cfqq, so we don't need to worry about it disappearing
  896. */
  897. static struct cfq_io_context *
  898. cfq_get_io_context(struct cfq_queue **cfqq, int gfp_flags)
  899. {
  900. struct cfq_data *cfqd = (*cfqq)->cfqd;
  901. struct cfq_queue *__cfqq = *cfqq;
  902. struct cfq_io_context *cic;
  903. struct io_context *ioc;
  904. might_sleep_if(gfp_flags & __GFP_WAIT);
  905. ioc = get_io_context(gfp_flags);
  906. if (!ioc)
  907. return NULL;
  908. if ((cic = ioc->cic) == NULL) {
  909. cic = cfq_alloc_io_context(gfp_flags);
  910. if (cic == NULL)
  911. goto err;
  912. ioc->cic = cic;
  913. cic->ioc = ioc;
  914. cic->cfqq = __cfqq;
  915. atomic_inc(&__cfqq->ref);
  916. } else {
  917. struct cfq_io_context *__cic;
  918. unsigned long flags;
  919. /*
  920. * since the first cic on the list is actually the head
  921. * itself, need to check this here or we'll duplicate an
  922. * cic per ioc for no reason
  923. */
  924. if (cic->cfqq == __cfqq)
  925. goto out;
  926. /*
  927. * cic exists, check if we already are there. linear search
  928. * should be ok here, the list will usually not be more than
  929. * 1 or a few entries long
  930. */
  931. spin_lock_irqsave(&ioc->lock, flags);
  932. list_for_each_entry(__cic, &cic->list, list) {
  933. /*
  934. * this process is already holding a reference to
  935. * this queue, so no need to get one more
  936. */
  937. if (__cic->cfqq == __cfqq) {
  938. cic = __cic;
  939. spin_unlock_irqrestore(&ioc->lock, flags);
  940. goto out;
  941. }
  942. }
  943. spin_unlock_irqrestore(&ioc->lock, flags);
  944. /*
  945. * nope, process doesn't have a cic assoicated with this
  946. * cfqq yet. get a new one and add to list
  947. */
  948. __cic = cfq_alloc_io_context(gfp_flags);
  949. if (__cic == NULL)
  950. goto err;
  951. __cic->ioc = ioc;
  952. __cic->cfqq = __cfqq;
  953. atomic_inc(&__cfqq->ref);
  954. spin_lock_irqsave(&ioc->lock, flags);
  955. list_add(&__cic->list, &cic->list);
  956. spin_unlock_irqrestore(&ioc->lock, flags);
  957. cic = __cic;
  958. *cfqq = __cfqq;
  959. }
  960. out:
  961. /*
  962. * if key_type has been changed on the fly, we lazily rehash
  963. * each queue at lookup time
  964. */
  965. if ((*cfqq)->key_type != cfqd->key_type)
  966. cfq_rehash_cfqq(cfqd, cfqq, cic);
  967. return cic;
  968. err:
  969. put_io_context(ioc);
  970. return NULL;
  971. }
  972. static struct cfq_queue *
  973. __cfq_get_queue(struct cfq_data *cfqd, unsigned long key, int gfp_mask)
  974. {
  975. const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
  976. struct cfq_queue *cfqq, *new_cfqq = NULL;
  977. retry:
  978. cfqq = __cfq_find_cfq_hash(cfqd, key, hashval);
  979. if (!cfqq) {
  980. if (new_cfqq) {
  981. cfqq = new_cfqq;
  982. new_cfqq = NULL;
  983. } else {
  984. spin_unlock_irq(cfqd->queue->queue_lock);
  985. new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
  986. spin_lock_irq(cfqd->queue->queue_lock);
  987. if (!new_cfqq && !(gfp_mask & __GFP_WAIT))
  988. goto out;
  989. goto retry;
  990. }
  991. memset(cfqq, 0, sizeof(*cfqq));
  992. INIT_HLIST_NODE(&cfqq->cfq_hash);
  993. INIT_LIST_HEAD(&cfqq->cfq_list);
  994. RB_CLEAR_ROOT(&cfqq->sort_list);
  995. INIT_LIST_HEAD(&cfqq->fifo[0]);
  996. INIT_LIST_HEAD(&cfqq->fifo[1]);
  997. cfqq->key = key;
  998. hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
  999. atomic_set(&cfqq->ref, 0);
  1000. cfqq->cfqd = cfqd;
  1001. atomic_inc(&cfqd->ref);
  1002. cfqq->key_type = cfqd->key_type;
  1003. cfqq->service_start = ~0UL;
  1004. }
  1005. if (new_cfqq)
  1006. kmem_cache_free(cfq_pool, new_cfqq);
  1007. atomic_inc(&cfqq->ref);
  1008. out:
  1009. WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
  1010. return cfqq;
  1011. }
  1012. static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
  1013. {
  1014. crq->is_sync = 0;
  1015. if (rq_data_dir(crq->request) == READ || current->flags & PF_SYNCWRITE)
  1016. crq->is_sync = 1;
  1017. cfq_add_crq_rb(crq);
  1018. crq->queue_start = jiffies;
  1019. list_add_tail(&crq->request->queuelist, &crq->cfq_queue->fifo[crq->is_sync]);
  1020. }
  1021. static void
  1022. cfq_insert_request(request_queue_t *q, struct request *rq, int where)
  1023. {
  1024. struct cfq_data *cfqd = q->elevator->elevator_data;
  1025. struct cfq_rq *crq = RQ_DATA(rq);
  1026. switch (where) {
  1027. case ELEVATOR_INSERT_BACK:
  1028. while (cfq_dispatch_requests(q, cfqd->cfq_quantum))
  1029. ;
  1030. list_add_tail(&rq->queuelist, &q->queue_head);
  1031. break;
  1032. case ELEVATOR_INSERT_FRONT:
  1033. list_add(&rq->queuelist, &q->queue_head);
  1034. break;
  1035. case ELEVATOR_INSERT_SORT:
  1036. BUG_ON(!blk_fs_request(rq));
  1037. cfq_enqueue(cfqd, crq);
  1038. break;
  1039. default:
  1040. printk("%s: bad insert point %d\n", __FUNCTION__,where);
  1041. return;
  1042. }
  1043. if (rq_mergeable(rq)) {
  1044. cfq_add_crq_hash(cfqd, crq);
  1045. if (!q->last_merge)
  1046. q->last_merge = rq;
  1047. }
  1048. }
  1049. static int cfq_queue_empty(request_queue_t *q)
  1050. {
  1051. struct cfq_data *cfqd = q->elevator->elevator_data;
  1052. return list_empty(&q->queue_head) && list_empty(&cfqd->rr_list);
  1053. }
  1054. static void cfq_completed_request(request_queue_t *q, struct request *rq)
  1055. {
  1056. struct cfq_rq *crq = RQ_DATA(rq);
  1057. struct cfq_queue *cfqq;
  1058. if (unlikely(!blk_fs_request(rq)))
  1059. return;
  1060. cfqq = crq->cfq_queue;
  1061. if (crq->in_flight) {
  1062. WARN_ON(!cfqq->in_flight);
  1063. cfqq->in_flight--;
  1064. }
  1065. cfq_account_completion(cfqq, crq);
  1066. }
  1067. static struct request *
  1068. cfq_former_request(request_queue_t *q, struct request *rq)
  1069. {
  1070. struct cfq_rq *crq = RQ_DATA(rq);
  1071. struct rb_node *rbprev = rb_prev(&crq->rb_node);
  1072. if (rbprev)
  1073. return rb_entry_crq(rbprev)->request;
  1074. return NULL;
  1075. }
  1076. static struct request *
  1077. cfq_latter_request(request_queue_t *q, struct request *rq)
  1078. {
  1079. struct cfq_rq *crq = RQ_DATA(rq);
  1080. struct rb_node *rbnext = rb_next(&crq->rb_node);
  1081. if (rbnext)
  1082. return rb_entry_crq(rbnext)->request;
  1083. return NULL;
  1084. }
  1085. static int cfq_may_queue(request_queue_t *q, int rw)
  1086. {
  1087. struct cfq_data *cfqd = q->elevator->elevator_data;
  1088. struct cfq_queue *cfqq;
  1089. int ret = ELV_MQUEUE_MAY;
  1090. if (current->flags & PF_MEMALLOC)
  1091. return ELV_MQUEUE_MAY;
  1092. cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(cfqd, current));
  1093. if (cfqq) {
  1094. int limit = cfqd->max_queued;
  1095. if (cfqq->allocated[rw] < cfqd->cfq_queued)
  1096. return ELV_MQUEUE_MUST;
  1097. if (cfqd->busy_queues)
  1098. limit = q->nr_requests / cfqd->busy_queues;
  1099. if (limit < cfqd->cfq_queued)
  1100. limit = cfqd->cfq_queued;
  1101. else if (limit > cfqd->max_queued)
  1102. limit = cfqd->max_queued;
  1103. if (cfqq->allocated[rw] >= limit) {
  1104. if (limit > cfqq->alloc_limit[rw])
  1105. cfqq->alloc_limit[rw] = limit;
  1106. ret = ELV_MQUEUE_NO;
  1107. }
  1108. }
  1109. return ret;
  1110. }
  1111. static void cfq_check_waiters(request_queue_t *q, struct cfq_queue *cfqq)
  1112. {
  1113. struct request_list *rl = &q->rq;
  1114. const int write = waitqueue_active(&rl->wait[WRITE]);
  1115. const int read = waitqueue_active(&rl->wait[READ]);
  1116. if (read && cfqq->allocated[READ] < cfqq->alloc_limit[READ])
  1117. wake_up(&rl->wait[READ]);
  1118. if (write && cfqq->allocated[WRITE] < cfqq->alloc_limit[WRITE])
  1119. wake_up(&rl->wait[WRITE]);
  1120. }
  1121. /*
  1122. * queue lock held here
  1123. */
  1124. static void cfq_put_request(request_queue_t *q, struct request *rq)
  1125. {
  1126. struct cfq_data *cfqd = q->elevator->elevator_data;
  1127. struct cfq_rq *crq = RQ_DATA(rq);
  1128. if (crq) {
  1129. struct cfq_queue *cfqq = crq->cfq_queue;
  1130. BUG_ON(q->last_merge == rq);
  1131. BUG_ON(!hlist_unhashed(&crq->hash));
  1132. if (crq->io_context)
  1133. put_io_context(crq->io_context->ioc);
  1134. BUG_ON(!cfqq->allocated[crq->is_write]);
  1135. cfqq->allocated[crq->is_write]--;
  1136. mempool_free(crq, cfqd->crq_pool);
  1137. rq->elevator_private = NULL;
  1138. smp_mb();
  1139. cfq_check_waiters(q, cfqq);
  1140. cfq_put_queue(cfqq);
  1141. }
  1142. }
  1143. /*
  1144. * Allocate cfq data structures associated with this request. A queue and
  1145. */
  1146. static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
  1147. {
  1148. struct cfq_data *cfqd = q->elevator->elevator_data;
  1149. struct cfq_io_context *cic;
  1150. const int rw = rq_data_dir(rq);
  1151. struct cfq_queue *cfqq, *saved_cfqq;
  1152. struct cfq_rq *crq;
  1153. unsigned long flags;
  1154. might_sleep_if(gfp_mask & __GFP_WAIT);
  1155. spin_lock_irqsave(q->queue_lock, flags);
  1156. cfqq = __cfq_get_queue(cfqd, cfq_hash_key(cfqd, current), gfp_mask);
  1157. if (!cfqq)
  1158. goto out_lock;
  1159. repeat:
  1160. if (cfqq->allocated[rw] >= cfqd->max_queued)
  1161. goto out_lock;
  1162. cfqq->allocated[rw]++;
  1163. spin_unlock_irqrestore(q->queue_lock, flags);
  1164. /*
  1165. * if hashing type has changed, the cfq_queue might change here.
  1166. */
  1167. saved_cfqq = cfqq;
  1168. cic = cfq_get_io_context(&cfqq, gfp_mask);
  1169. if (!cic)
  1170. goto err;
  1171. /*
  1172. * repeat allocation checks on queue change
  1173. */
  1174. if (unlikely(saved_cfqq != cfqq)) {
  1175. spin_lock_irqsave(q->queue_lock, flags);
  1176. saved_cfqq->allocated[rw]--;
  1177. goto repeat;
  1178. }
  1179. crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
  1180. if (crq) {
  1181. RB_CLEAR(&crq->rb_node);
  1182. crq->rb_key = 0;
  1183. crq->request = rq;
  1184. INIT_HLIST_NODE(&crq->hash);
  1185. crq->cfq_queue = cfqq;
  1186. crq->io_context = cic;
  1187. crq->service_start = crq->queue_start = 0;
  1188. crq->in_flight = crq->accounted = crq->is_sync = 0;
  1189. crq->is_write = rw;
  1190. rq->elevator_private = crq;
  1191. cfqq->alloc_limit[rw] = 0;
  1192. return 0;
  1193. }
  1194. put_io_context(cic->ioc);
  1195. err:
  1196. spin_lock_irqsave(q->queue_lock, flags);
  1197. cfqq->allocated[rw]--;
  1198. cfq_put_queue(cfqq);
  1199. out_lock:
  1200. spin_unlock_irqrestore(q->queue_lock, flags);
  1201. return 1;
  1202. }
  1203. static void cfq_put_cfqd(struct cfq_data *cfqd)
  1204. {
  1205. request_queue_t *q = cfqd->queue;
  1206. if (!atomic_dec_and_test(&cfqd->ref))
  1207. return;
  1208. blk_put_queue(q);
  1209. mempool_destroy(cfqd->crq_pool);
  1210. kfree(cfqd->crq_hash);
  1211. kfree(cfqd->cfq_hash);
  1212. kfree(cfqd);
  1213. }
  1214. static void cfq_exit_queue(elevator_t *e)
  1215. {
  1216. cfq_put_cfqd(e->elevator_data);
  1217. }
  1218. static int cfq_init_queue(request_queue_t *q, elevator_t *e)
  1219. {
  1220. struct cfq_data *cfqd;
  1221. int i;
  1222. cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
  1223. if (!cfqd)
  1224. return -ENOMEM;
  1225. memset(cfqd, 0, sizeof(*cfqd));
  1226. INIT_LIST_HEAD(&cfqd->rr_list);
  1227. INIT_LIST_HEAD(&cfqd->empty_list);
  1228. cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
  1229. if (!cfqd->crq_hash)
  1230. goto out_crqhash;
  1231. cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
  1232. if (!cfqd->cfq_hash)
  1233. goto out_cfqhash;
  1234. cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
  1235. if (!cfqd->crq_pool)
  1236. goto out_crqpool;
  1237. for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
  1238. INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
  1239. for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
  1240. INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
  1241. e->elevator_data = cfqd;
  1242. cfqd->queue = q;
  1243. atomic_inc(&q->refcnt);
  1244. /*
  1245. * just set it to some high value, we want anyone to be able to queue
  1246. * some requests. fairness is handled differently
  1247. */
  1248. q->nr_requests = 1024;
  1249. cfqd->max_queued = q->nr_requests / 16;
  1250. q->nr_batching = cfq_queued;
  1251. cfqd->key_type = CFQ_KEY_TGID;
  1252. cfqd->find_best_crq = 1;
  1253. atomic_set(&cfqd->ref, 1);
  1254. cfqd->cfq_queued = cfq_queued;
  1255. cfqd->cfq_quantum = cfq_quantum;
  1256. cfqd->cfq_fifo_expire_r = cfq_fifo_expire_r;
  1257. cfqd->cfq_fifo_expire_w = cfq_fifo_expire_w;
  1258. cfqd->cfq_fifo_batch_expire = cfq_fifo_rate;
  1259. cfqd->cfq_back_max = cfq_back_max;
  1260. cfqd->cfq_back_penalty = cfq_back_penalty;
  1261. return 0;
  1262. out_crqpool:
  1263. kfree(cfqd->cfq_hash);
  1264. out_cfqhash:
  1265. kfree(cfqd->crq_hash);
  1266. out_crqhash:
  1267. kfree(cfqd);
  1268. return -ENOMEM;
  1269. }
  1270. static void cfq_slab_kill(void)
  1271. {
  1272. if (crq_pool)
  1273. kmem_cache_destroy(crq_pool);
  1274. if (cfq_pool)
  1275. kmem_cache_destroy(cfq_pool);
  1276. if (cfq_ioc_pool)
  1277. kmem_cache_destroy(cfq_ioc_pool);
  1278. }
  1279. static int __init cfq_slab_setup(void)
  1280. {
  1281. crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
  1282. NULL, NULL);
  1283. if (!crq_pool)
  1284. goto fail;
  1285. cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
  1286. NULL, NULL);
  1287. if (!cfq_pool)
  1288. goto fail;
  1289. cfq_ioc_pool = kmem_cache_create("cfq_ioc_pool",
  1290. sizeof(struct cfq_io_context), 0, 0, NULL, NULL);
  1291. if (!cfq_ioc_pool)
  1292. goto fail;
  1293. return 0;
  1294. fail:
  1295. cfq_slab_kill();
  1296. return -ENOMEM;
  1297. }
  1298. /*
  1299. * sysfs parts below -->
  1300. */
  1301. struct cfq_fs_entry {
  1302. struct attribute attr;
  1303. ssize_t (*show)(struct cfq_data *, char *);
  1304. ssize_t (*store)(struct cfq_data *, const char *, size_t);
  1305. };
  1306. static ssize_t
  1307. cfq_var_show(unsigned int var, char *page)
  1308. {
  1309. return sprintf(page, "%d\n", var);
  1310. }
  1311. static ssize_t
  1312. cfq_var_store(unsigned int *var, const char *page, size_t count)
  1313. {
  1314. char *p = (char *) page;
  1315. *var = simple_strtoul(p, &p, 10);
  1316. return count;
  1317. }
  1318. static ssize_t
  1319. cfq_clear_elapsed(struct cfq_data *cfqd, const char *page, size_t count)
  1320. {
  1321. max_elapsed_dispatch = max_elapsed_crq = 0;
  1322. return count;
  1323. }
  1324. static ssize_t
  1325. cfq_set_key_type(struct cfq_data *cfqd, const char *page, size_t count)
  1326. {
  1327. spin_lock_irq(cfqd->queue->queue_lock);
  1328. if (!strncmp(page, "pgid", 4))
  1329. cfqd->key_type = CFQ_KEY_PGID;
  1330. else if (!strncmp(page, "tgid", 4))
  1331. cfqd->key_type = CFQ_KEY_TGID;
  1332. else if (!strncmp(page, "uid", 3))
  1333. cfqd->key_type = CFQ_KEY_UID;
  1334. else if (!strncmp(page, "gid", 3))
  1335. cfqd->key_type = CFQ_KEY_GID;
  1336. spin_unlock_irq(cfqd->queue->queue_lock);
  1337. return count;
  1338. }
  1339. static ssize_t
  1340. cfq_read_key_type(struct cfq_data *cfqd, char *page)
  1341. {
  1342. ssize_t len = 0;
  1343. int i;
  1344. for (i = CFQ_KEY_PGID; i < CFQ_KEY_LAST; i++) {
  1345. if (cfqd->key_type == i)
  1346. len += sprintf(page+len, "[%s] ", cfq_key_types[i]);
  1347. else
  1348. len += sprintf(page+len, "%s ", cfq_key_types[i]);
  1349. }
  1350. len += sprintf(page+len, "\n");
  1351. return len;
  1352. }
  1353. #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
  1354. static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \
  1355. { \
  1356. unsigned int __data = __VAR; \
  1357. if (__CONV) \
  1358. __data = jiffies_to_msecs(__data); \
  1359. return cfq_var_show(__data, (page)); \
  1360. }
  1361. SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
  1362. SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued, 0);
  1363. SHOW_FUNCTION(cfq_fifo_expire_r_show, cfqd->cfq_fifo_expire_r, 1);
  1364. SHOW_FUNCTION(cfq_fifo_expire_w_show, cfqd->cfq_fifo_expire_w, 1);
  1365. SHOW_FUNCTION(cfq_fifo_batch_expire_show, cfqd->cfq_fifo_batch_expire, 1);
  1366. SHOW_FUNCTION(cfq_find_best_show, cfqd->find_best_crq, 0);
  1367. SHOW_FUNCTION(cfq_back_max_show, cfqd->cfq_back_max, 0);
  1368. SHOW_FUNCTION(cfq_back_penalty_show, cfqd->cfq_back_penalty, 0);
  1369. #undef SHOW_FUNCTION
  1370. #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
  1371. static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \
  1372. { \
  1373. unsigned int __data; \
  1374. int ret = cfq_var_store(&__data, (page), count); \
  1375. if (__data < (MIN)) \
  1376. __data = (MIN); \
  1377. else if (__data > (MAX)) \
  1378. __data = (MAX); \
  1379. if (__CONV) \
  1380. *(__PTR) = msecs_to_jiffies(__data); \
  1381. else \
  1382. *(__PTR) = __data; \
  1383. return ret; \
  1384. }
  1385. STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
  1386. STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, UINT_MAX, 0);
  1387. STORE_FUNCTION(cfq_fifo_expire_r_store, &cfqd->cfq_fifo_expire_r, 1, UINT_MAX, 1);
  1388. STORE_FUNCTION(cfq_fifo_expire_w_store, &cfqd->cfq_fifo_expire_w, 1, UINT_MAX, 1);
  1389. STORE_FUNCTION(cfq_fifo_batch_expire_store, &cfqd->cfq_fifo_batch_expire, 0, UINT_MAX, 1);
  1390. STORE_FUNCTION(cfq_find_best_store, &cfqd->find_best_crq, 0, 1, 0);
  1391. STORE_FUNCTION(cfq_back_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
  1392. STORE_FUNCTION(cfq_back_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0);
  1393. #undef STORE_FUNCTION
  1394. static struct cfq_fs_entry cfq_quantum_entry = {
  1395. .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR },
  1396. .show = cfq_quantum_show,
  1397. .store = cfq_quantum_store,
  1398. };
  1399. static struct cfq_fs_entry cfq_queued_entry = {
  1400. .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
  1401. .show = cfq_queued_show,
  1402. .store = cfq_queued_store,
  1403. };
  1404. static struct cfq_fs_entry cfq_fifo_expire_r_entry = {
  1405. .attr = {.name = "fifo_expire_sync", .mode = S_IRUGO | S_IWUSR },
  1406. .show = cfq_fifo_expire_r_show,
  1407. .store = cfq_fifo_expire_r_store,
  1408. };
  1409. static struct cfq_fs_entry cfq_fifo_expire_w_entry = {
  1410. .attr = {.name = "fifo_expire_async", .mode = S_IRUGO | S_IWUSR },
  1411. .show = cfq_fifo_expire_w_show,
  1412. .store = cfq_fifo_expire_w_store,
  1413. };
  1414. static struct cfq_fs_entry cfq_fifo_batch_expire_entry = {
  1415. .attr = {.name = "fifo_batch_expire", .mode = S_IRUGO | S_IWUSR },
  1416. .show = cfq_fifo_batch_expire_show,
  1417. .store = cfq_fifo_batch_expire_store,
  1418. };
  1419. static struct cfq_fs_entry cfq_find_best_entry = {
  1420. .attr = {.name = "find_best_crq", .mode = S_IRUGO | S_IWUSR },
  1421. .show = cfq_find_best_show,
  1422. .store = cfq_find_best_store,
  1423. };
  1424. static struct cfq_fs_entry cfq_back_max_entry = {
  1425. .attr = {.name = "back_seek_max", .mode = S_IRUGO | S_IWUSR },
  1426. .show = cfq_back_max_show,
  1427. .store = cfq_back_max_store,
  1428. };
  1429. static struct cfq_fs_entry cfq_back_penalty_entry = {
  1430. .attr = {.name = "back_seek_penalty", .mode = S_IRUGO | S_IWUSR },
  1431. .show = cfq_back_penalty_show,
  1432. .store = cfq_back_penalty_store,
  1433. };
  1434. static struct cfq_fs_entry cfq_clear_elapsed_entry = {
  1435. .attr = {.name = "clear_elapsed", .mode = S_IWUSR },
  1436. .store = cfq_clear_elapsed,
  1437. };
  1438. static struct cfq_fs_entry cfq_key_type_entry = {
  1439. .attr = {.name = "key_type", .mode = S_IRUGO | S_IWUSR },
  1440. .show = cfq_read_key_type,
  1441. .store = cfq_set_key_type,
  1442. };
  1443. static struct attribute *default_attrs[] = {
  1444. &cfq_quantum_entry.attr,
  1445. &cfq_queued_entry.attr,
  1446. &cfq_fifo_expire_r_entry.attr,
  1447. &cfq_fifo_expire_w_entry.attr,
  1448. &cfq_fifo_batch_expire_entry.attr,
  1449. &cfq_key_type_entry.attr,
  1450. &cfq_find_best_entry.attr,
  1451. &cfq_back_max_entry.attr,
  1452. &cfq_back_penalty_entry.attr,
  1453. &cfq_clear_elapsed_entry.attr,
  1454. NULL,
  1455. };
  1456. #define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)
  1457. static ssize_t
  1458. cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
  1459. {
  1460. elevator_t *e = container_of(kobj, elevator_t, kobj);
  1461. struct cfq_fs_entry *entry = to_cfq(attr);
  1462. if (!entry->show)
  1463. return -EIO;
  1464. return entry->show(e->elevator_data, page);
  1465. }
  1466. static ssize_t
  1467. cfq_attr_store(struct kobject *kobj, struct attribute *attr,
  1468. const char *page, size_t length)
  1469. {
  1470. elevator_t *e = container_of(kobj, elevator_t, kobj);
  1471. struct cfq_fs_entry *entry = to_cfq(attr);
  1472. if (!entry->store)
  1473. return -EIO;
  1474. return entry->store(e->elevator_data, page, length);
  1475. }
  1476. static struct sysfs_ops cfq_sysfs_ops = {
  1477. .show = cfq_attr_show,
  1478. .store = cfq_attr_store,
  1479. };
  1480. static struct kobj_type cfq_ktype = {
  1481. .sysfs_ops = &cfq_sysfs_ops,
  1482. .default_attrs = default_attrs,
  1483. };
  1484. static struct elevator_type iosched_cfq = {
  1485. .ops = {
  1486. .elevator_merge_fn = cfq_merge,
  1487. .elevator_merged_fn = cfq_merged_request,
  1488. .elevator_merge_req_fn = cfq_merged_requests,
  1489. .elevator_next_req_fn = cfq_next_request,
  1490. .elevator_add_req_fn = cfq_insert_request,
  1491. .elevator_remove_req_fn = cfq_remove_request,
  1492. .elevator_requeue_req_fn = cfq_requeue_request,
  1493. .elevator_deactivate_req_fn = cfq_deactivate_request,
  1494. .elevator_queue_empty_fn = cfq_queue_empty,
  1495. .elevator_completed_req_fn = cfq_completed_request,
  1496. .elevator_former_req_fn = cfq_former_request,
  1497. .elevator_latter_req_fn = cfq_latter_request,
  1498. .elevator_set_req_fn = cfq_set_request,
  1499. .elevator_put_req_fn = cfq_put_request,
  1500. .elevator_may_queue_fn = cfq_may_queue,
  1501. .elevator_init_fn = cfq_init_queue,
  1502. .elevator_exit_fn = cfq_exit_queue,
  1503. },
  1504. .elevator_ktype = &cfq_ktype,
  1505. .elevator_name = "cfq",
  1506. .elevator_owner = THIS_MODULE,
  1507. };
  1508. static int __init cfq_init(void)
  1509. {
  1510. int ret;
  1511. if (cfq_slab_setup())
  1512. return -ENOMEM;
  1513. ret = elv_register(&iosched_cfq);
  1514. if (!ret) {
  1515. __module_get(THIS_MODULE);
  1516. return 0;
  1517. }
  1518. cfq_slab_kill();
  1519. return ret;
  1520. }
  1521. static void __exit cfq_exit(void)
  1522. {
  1523. cfq_slab_kill();
  1524. elv_unregister(&iosched_cfq);
  1525. }
  1526. module_init(cfq_init);
  1527. module_exit(cfq_exit);
  1528. MODULE_AUTHOR("Jens Axboe");
  1529. MODULE_LICENSE("GPL");
  1530. MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");