sched_rt.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. /*
  2. * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  3. * policies)
  4. */
  5. #ifdef CONFIG_SMP
  6. static inline int rt_overloaded(struct rq *rq)
  7. {
  8. return atomic_read(&rq->rd->rto_count);
  9. }
  10. static inline void rt_set_overload(struct rq *rq)
  11. {
  12. cpu_set(rq->cpu, rq->rd->rto_mask);
  13. /*
  14. * Make sure the mask is visible before we set
  15. * the overload count. That is checked to determine
  16. * if we should look at the mask. It would be a shame
  17. * if we looked at the mask, but the mask was not
  18. * updated yet.
  19. */
  20. wmb();
  21. atomic_inc(&rq->rd->rto_count);
  22. }
  23. static inline void rt_clear_overload(struct rq *rq)
  24. {
  25. /* the order here really doesn't matter */
  26. atomic_dec(&rq->rd->rto_count);
  27. cpu_clear(rq->cpu, rq->rd->rto_mask);
  28. }
  29. static void update_rt_migration(struct rq *rq)
  30. {
  31. if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
  32. if (!rq->rt.overloaded) {
  33. rt_set_overload(rq);
  34. rq->rt.overloaded = 1;
  35. }
  36. } else if (rq->rt.overloaded) {
  37. rt_clear_overload(rq);
  38. rq->rt.overloaded = 0;
  39. }
  40. }
  41. #endif /* CONFIG_SMP */
  42. /*
  43. * Update the current task's runtime statistics. Skip current tasks that
  44. * are not in our scheduling class.
  45. */
  46. static void update_curr_rt(struct rq *rq)
  47. {
  48. struct task_struct *curr = rq->curr;
  49. u64 delta_exec;
  50. if (!task_has_rt_policy(curr))
  51. return;
  52. delta_exec = rq->clock - curr->se.exec_start;
  53. if (unlikely((s64)delta_exec < 0))
  54. delta_exec = 0;
  55. schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
  56. curr->se.sum_exec_runtime += delta_exec;
  57. curr->se.exec_start = rq->clock;
  58. cpuacct_charge(curr, delta_exec);
  59. }
  60. static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
  61. {
  62. WARN_ON(!rt_task(p));
  63. rq->rt.rt_nr_running++;
  64. #ifdef CONFIG_SMP
  65. if (p->prio < rq->rt.highest_prio)
  66. rq->rt.highest_prio = p->prio;
  67. if (p->nr_cpus_allowed > 1)
  68. rq->rt.rt_nr_migratory++;
  69. update_rt_migration(rq);
  70. #endif /* CONFIG_SMP */
  71. }
  72. static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
  73. {
  74. WARN_ON(!rt_task(p));
  75. WARN_ON(!rq->rt.rt_nr_running);
  76. rq->rt.rt_nr_running--;
  77. #ifdef CONFIG_SMP
  78. if (rq->rt.rt_nr_running) {
  79. struct rt_prio_array *array;
  80. WARN_ON(p->prio < rq->rt.highest_prio);
  81. if (p->prio == rq->rt.highest_prio) {
  82. /* recalculate */
  83. array = &rq->rt.active;
  84. rq->rt.highest_prio =
  85. sched_find_first_bit(array->bitmap);
  86. } /* otherwise leave rq->highest prio alone */
  87. } else
  88. rq->rt.highest_prio = MAX_RT_PRIO;
  89. if (p->nr_cpus_allowed > 1)
  90. rq->rt.rt_nr_migratory--;
  91. update_rt_migration(rq);
  92. #endif /* CONFIG_SMP */
  93. }
  94. static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
  95. {
  96. struct rt_prio_array *array = &rq->rt.active;
  97. list_add_tail(&p->rt.run_list, array->queue + p->prio);
  98. __set_bit(p->prio, array->bitmap);
  99. inc_cpu_load(rq, p->se.load.weight);
  100. inc_rt_tasks(p, rq);
  101. if (wakeup)
  102. p->rt.timeout = 0;
  103. }
  104. /*
  105. * Adding/removing a task to/from a priority array:
  106. */
  107. static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
  108. {
  109. struct rt_prio_array *array = &rq->rt.active;
  110. update_curr_rt(rq);
  111. list_del(&p->rt.run_list);
  112. if (list_empty(array->queue + p->prio))
  113. __clear_bit(p->prio, array->bitmap);
  114. dec_cpu_load(rq, p->se.load.weight);
  115. dec_rt_tasks(p, rq);
  116. }
  117. /*
  118. * Put task to the end of the run list without the overhead of dequeue
  119. * followed by enqueue.
  120. */
  121. static void requeue_task_rt(struct rq *rq, struct task_struct *p)
  122. {
  123. struct rt_prio_array *array = &rq->rt.active;
  124. list_move_tail(&p->rt.run_list, array->queue + p->prio);
  125. }
  126. static void
  127. yield_task_rt(struct rq *rq)
  128. {
  129. requeue_task_rt(rq, rq->curr);
  130. }
  131. #ifdef CONFIG_SMP
  132. static int find_lowest_rq(struct task_struct *task);
  133. static int select_task_rq_rt(struct task_struct *p, int sync)
  134. {
  135. struct rq *rq = task_rq(p);
  136. /*
  137. * If the current task is an RT task, then
  138. * try to see if we can wake this RT task up on another
  139. * runqueue. Otherwise simply start this RT task
  140. * on its current runqueue.
  141. *
  142. * We want to avoid overloading runqueues. Even if
  143. * the RT task is of higher priority than the current RT task.
  144. * RT tasks behave differently than other tasks. If
  145. * one gets preempted, we try to push it off to another queue.
  146. * So trying to keep a preempting RT task on the same
  147. * cache hot CPU will force the running RT task to
  148. * a cold CPU. So we waste all the cache for the lower
  149. * RT task in hopes of saving some of a RT task
  150. * that is just being woken and probably will have
  151. * cold cache anyway.
  152. */
  153. if (unlikely(rt_task(rq->curr)) &&
  154. (p->nr_cpus_allowed > 1)) {
  155. int cpu = find_lowest_rq(p);
  156. return (cpu == -1) ? task_cpu(p) : cpu;
  157. }
  158. /*
  159. * Otherwise, just let it ride on the affined RQ and the
  160. * post-schedule router will push the preempted task away
  161. */
  162. return task_cpu(p);
  163. }
  164. #endif /* CONFIG_SMP */
  165. /*
  166. * Preempt the current task with a newly woken task if needed:
  167. */
  168. static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
  169. {
  170. if (p->prio < rq->curr->prio)
  171. resched_task(rq->curr);
  172. }
  173. static struct task_struct *pick_next_task_rt(struct rq *rq)
  174. {
  175. struct rt_prio_array *array = &rq->rt.active;
  176. struct task_struct *next;
  177. struct list_head *queue;
  178. int idx;
  179. idx = sched_find_first_bit(array->bitmap);
  180. if (idx >= MAX_RT_PRIO)
  181. return NULL;
  182. queue = array->queue + idx;
  183. next = list_entry(queue->next, struct task_struct, rt.run_list);
  184. next->se.exec_start = rq->clock;
  185. return next;
  186. }
  187. static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  188. {
  189. update_curr_rt(rq);
  190. p->se.exec_start = 0;
  191. }
  192. #ifdef CONFIG_SMP
  193. /* Only try algorithms three times */
  194. #define RT_MAX_TRIES 3
  195. static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
  196. static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
  197. static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
  198. {
  199. if (!task_running(rq, p) &&
  200. (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
  201. (p->nr_cpus_allowed > 1))
  202. return 1;
  203. return 0;
  204. }
  205. /* Return the second highest RT task, NULL otherwise */
  206. static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
  207. {
  208. struct rt_prio_array *array = &rq->rt.active;
  209. struct task_struct *next;
  210. struct list_head *queue;
  211. int idx;
  212. if (likely(rq->rt.rt_nr_running < 2))
  213. return NULL;
  214. idx = sched_find_first_bit(array->bitmap);
  215. if (unlikely(idx >= MAX_RT_PRIO)) {
  216. WARN_ON(1); /* rt_nr_running is bad */
  217. return NULL;
  218. }
  219. queue = array->queue + idx;
  220. BUG_ON(list_empty(queue));
  221. next = list_entry(queue->next, struct task_struct, rt.run_list);
  222. if (unlikely(pick_rt_task(rq, next, cpu)))
  223. goto out;
  224. if (queue->next->next != queue) {
  225. /* same prio task */
  226. next = list_entry(queue->next->next, struct task_struct,
  227. rt.run_list);
  228. if (pick_rt_task(rq, next, cpu))
  229. goto out;
  230. }
  231. retry:
  232. /* slower, but more flexible */
  233. idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
  234. if (unlikely(idx >= MAX_RT_PRIO))
  235. return NULL;
  236. queue = array->queue + idx;
  237. BUG_ON(list_empty(queue));
  238. list_for_each_entry(next, queue, rt.run_list) {
  239. if (pick_rt_task(rq, next, cpu))
  240. goto out;
  241. }
  242. goto retry;
  243. out:
  244. return next;
  245. }
  246. static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
  247. static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
  248. {
  249. int lowest_prio = -1;
  250. int lowest_cpu = -1;
  251. int count = 0;
  252. int cpu;
  253. cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
  254. /*
  255. * Scan each rq for the lowest prio.
  256. */
  257. for_each_cpu_mask(cpu, *lowest_mask) {
  258. struct rq *rq = cpu_rq(cpu);
  259. /* We look for lowest RT prio or non-rt CPU */
  260. if (rq->rt.highest_prio >= MAX_RT_PRIO) {
  261. /*
  262. * if we already found a low RT queue
  263. * and now we found this non-rt queue
  264. * clear the mask and set our bit.
  265. * Otherwise just return the queue as is
  266. * and the count==1 will cause the algorithm
  267. * to use the first bit found.
  268. */
  269. if (lowest_cpu != -1) {
  270. cpus_clear(*lowest_mask);
  271. cpu_set(rq->cpu, *lowest_mask);
  272. }
  273. return 1;
  274. }
  275. /* no locking for now */
  276. if ((rq->rt.highest_prio > task->prio)
  277. && (rq->rt.highest_prio >= lowest_prio)) {
  278. if (rq->rt.highest_prio > lowest_prio) {
  279. /* new low - clear old data */
  280. lowest_prio = rq->rt.highest_prio;
  281. lowest_cpu = cpu;
  282. count = 0;
  283. }
  284. count++;
  285. } else
  286. cpu_clear(cpu, *lowest_mask);
  287. }
  288. /*
  289. * Clear out all the set bits that represent
  290. * runqueues that were of higher prio than
  291. * the lowest_prio.
  292. */
  293. if (lowest_cpu > 0) {
  294. /*
  295. * Perhaps we could add another cpumask op to
  296. * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
  297. * Then that could be optimized to use memset and such.
  298. */
  299. for_each_cpu_mask(cpu, *lowest_mask) {
  300. if (cpu >= lowest_cpu)
  301. break;
  302. cpu_clear(cpu, *lowest_mask);
  303. }
  304. }
  305. return count;
  306. }
  307. static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
  308. {
  309. int first;
  310. /* "this_cpu" is cheaper to preempt than a remote processor */
  311. if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
  312. return this_cpu;
  313. first = first_cpu(*mask);
  314. if (first != NR_CPUS)
  315. return first;
  316. return -1;
  317. }
  318. static int find_lowest_rq(struct task_struct *task)
  319. {
  320. struct sched_domain *sd;
  321. cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
  322. int this_cpu = smp_processor_id();
  323. int cpu = task_cpu(task);
  324. int count = find_lowest_cpus(task, lowest_mask);
  325. if (!count)
  326. return -1; /* No targets found */
  327. /*
  328. * There is no sense in performing an optimal search if only one
  329. * target is found.
  330. */
  331. if (count == 1)
  332. return first_cpu(*lowest_mask);
  333. /*
  334. * At this point we have built a mask of cpus representing the
  335. * lowest priority tasks in the system. Now we want to elect
  336. * the best one based on our affinity and topology.
  337. *
  338. * We prioritize the last cpu that the task executed on since
  339. * it is most likely cache-hot in that location.
  340. */
  341. if (cpu_isset(cpu, *lowest_mask))
  342. return cpu;
  343. /*
  344. * Otherwise, we consult the sched_domains span maps to figure
  345. * out which cpu is logically closest to our hot cache data.
  346. */
  347. if (this_cpu == cpu)
  348. this_cpu = -1; /* Skip this_cpu opt if the same */
  349. for_each_domain(cpu, sd) {
  350. if (sd->flags & SD_WAKE_AFFINE) {
  351. cpumask_t domain_mask;
  352. int best_cpu;
  353. cpus_and(domain_mask, sd->span, *lowest_mask);
  354. best_cpu = pick_optimal_cpu(this_cpu,
  355. &domain_mask);
  356. if (best_cpu != -1)
  357. return best_cpu;
  358. }
  359. }
  360. /*
  361. * And finally, if there were no matches within the domains
  362. * just give the caller *something* to work with from the compatible
  363. * locations.
  364. */
  365. return pick_optimal_cpu(this_cpu, lowest_mask);
  366. }
  367. /* Will lock the rq it finds */
  368. static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
  369. {
  370. struct rq *lowest_rq = NULL;
  371. int tries;
  372. int cpu;
  373. for (tries = 0; tries < RT_MAX_TRIES; tries++) {
  374. cpu = find_lowest_rq(task);
  375. if ((cpu == -1) || (cpu == rq->cpu))
  376. break;
  377. lowest_rq = cpu_rq(cpu);
  378. /* if the prio of this runqueue changed, try again */
  379. if (double_lock_balance(rq, lowest_rq)) {
  380. /*
  381. * We had to unlock the run queue. In
  382. * the mean time, task could have
  383. * migrated already or had its affinity changed.
  384. * Also make sure that it wasn't scheduled on its rq.
  385. */
  386. if (unlikely(task_rq(task) != rq ||
  387. !cpu_isset(lowest_rq->cpu,
  388. task->cpus_allowed) ||
  389. task_running(rq, task) ||
  390. !task->se.on_rq)) {
  391. spin_unlock(&lowest_rq->lock);
  392. lowest_rq = NULL;
  393. break;
  394. }
  395. }
  396. /* If this rq is still suitable use it. */
  397. if (lowest_rq->rt.highest_prio > task->prio)
  398. break;
  399. /* try again */
  400. spin_unlock(&lowest_rq->lock);
  401. lowest_rq = NULL;
  402. }
  403. return lowest_rq;
  404. }
  405. /*
  406. * If the current CPU has more than one RT task, see if the non
  407. * running task can migrate over to a CPU that is running a task
  408. * of lesser priority.
  409. */
  410. static int push_rt_task(struct rq *rq)
  411. {
  412. struct task_struct *next_task;
  413. struct rq *lowest_rq;
  414. int ret = 0;
  415. int paranoid = RT_MAX_TRIES;
  416. if (!rq->rt.overloaded)
  417. return 0;
  418. next_task = pick_next_highest_task_rt(rq, -1);
  419. if (!next_task)
  420. return 0;
  421. retry:
  422. if (unlikely(next_task == rq->curr)) {
  423. WARN_ON(1);
  424. return 0;
  425. }
  426. /*
  427. * It's possible that the next_task slipped in of
  428. * higher priority than current. If that's the case
  429. * just reschedule current.
  430. */
  431. if (unlikely(next_task->prio < rq->curr->prio)) {
  432. resched_task(rq->curr);
  433. return 0;
  434. }
  435. /* We might release rq lock */
  436. get_task_struct(next_task);
  437. /* find_lock_lowest_rq locks the rq if found */
  438. lowest_rq = find_lock_lowest_rq(next_task, rq);
  439. if (!lowest_rq) {
  440. struct task_struct *task;
  441. /*
  442. * find lock_lowest_rq releases rq->lock
  443. * so it is possible that next_task has changed.
  444. * If it has, then try again.
  445. */
  446. task = pick_next_highest_task_rt(rq, -1);
  447. if (unlikely(task != next_task) && task && paranoid--) {
  448. put_task_struct(next_task);
  449. next_task = task;
  450. goto retry;
  451. }
  452. goto out;
  453. }
  454. deactivate_task(rq, next_task, 0);
  455. set_task_cpu(next_task, lowest_rq->cpu);
  456. activate_task(lowest_rq, next_task, 0);
  457. resched_task(lowest_rq->curr);
  458. spin_unlock(&lowest_rq->lock);
  459. ret = 1;
  460. out:
  461. put_task_struct(next_task);
  462. return ret;
  463. }
  464. /*
  465. * TODO: Currently we just use the second highest prio task on
  466. * the queue, and stop when it can't migrate (or there's
  467. * no more RT tasks). There may be a case where a lower
  468. * priority RT task has a different affinity than the
  469. * higher RT task. In this case the lower RT task could
  470. * possibly be able to migrate where as the higher priority
  471. * RT task could not. We currently ignore this issue.
  472. * Enhancements are welcome!
  473. */
  474. static void push_rt_tasks(struct rq *rq)
  475. {
  476. /* push_rt_task will return true if it moved an RT */
  477. while (push_rt_task(rq))
  478. ;
  479. }
  480. static int pull_rt_task(struct rq *this_rq)
  481. {
  482. int this_cpu = this_rq->cpu, ret = 0, cpu;
  483. struct task_struct *p, *next;
  484. struct rq *src_rq;
  485. if (likely(!rt_overloaded(this_rq)))
  486. return 0;
  487. next = pick_next_task_rt(this_rq);
  488. for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
  489. if (this_cpu == cpu)
  490. continue;
  491. src_rq = cpu_rq(cpu);
  492. /*
  493. * We can potentially drop this_rq's lock in
  494. * double_lock_balance, and another CPU could
  495. * steal our next task - hence we must cause
  496. * the caller to recalculate the next task
  497. * in that case:
  498. */
  499. if (double_lock_balance(this_rq, src_rq)) {
  500. struct task_struct *old_next = next;
  501. next = pick_next_task_rt(this_rq);
  502. if (next != old_next)
  503. ret = 1;
  504. }
  505. /*
  506. * Are there still pullable RT tasks?
  507. */
  508. if (src_rq->rt.rt_nr_running <= 1) {
  509. spin_unlock(&src_rq->lock);
  510. continue;
  511. }
  512. p = pick_next_highest_task_rt(src_rq, this_cpu);
  513. /*
  514. * Do we have an RT task that preempts
  515. * the to-be-scheduled task?
  516. */
  517. if (p && (!next || (p->prio < next->prio))) {
  518. WARN_ON(p == src_rq->curr);
  519. WARN_ON(!p->se.on_rq);
  520. /*
  521. * There's a chance that p is higher in priority
  522. * than what's currently running on its cpu.
  523. * This is just that p is wakeing up and hasn't
  524. * had a chance to schedule. We only pull
  525. * p if it is lower in priority than the
  526. * current task on the run queue or
  527. * this_rq next task is lower in prio than
  528. * the current task on that rq.
  529. */
  530. if (p->prio < src_rq->curr->prio ||
  531. (next && next->prio < src_rq->curr->prio))
  532. goto out;
  533. ret = 1;
  534. deactivate_task(src_rq, p, 0);
  535. set_task_cpu(p, this_cpu);
  536. activate_task(this_rq, p, 0);
  537. /*
  538. * We continue with the search, just in
  539. * case there's an even higher prio task
  540. * in another runqueue. (low likelyhood
  541. * but possible)
  542. *
  543. * Update next so that we won't pick a task
  544. * on another cpu with a priority lower (or equal)
  545. * than the one we just picked.
  546. */
  547. next = p;
  548. }
  549. out:
  550. spin_unlock(&src_rq->lock);
  551. }
  552. return ret;
  553. }
  554. static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
  555. {
  556. /* Try to pull RT tasks here if we lower this rq's prio */
  557. if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
  558. pull_rt_task(rq);
  559. }
  560. static void post_schedule_rt(struct rq *rq)
  561. {
  562. /*
  563. * If we have more than one rt_task queued, then
  564. * see if we can push the other rt_tasks off to other CPUS.
  565. * Note we may release the rq lock, and since
  566. * the lock was owned by prev, we need to release it
  567. * first via finish_lock_switch and then reaquire it here.
  568. */
  569. if (unlikely(rq->rt.overloaded)) {
  570. spin_lock_irq(&rq->lock);
  571. push_rt_tasks(rq);
  572. spin_unlock_irq(&rq->lock);
  573. }
  574. }
  575. static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
  576. {
  577. if (!task_running(rq, p) &&
  578. (p->prio >= rq->rt.highest_prio) &&
  579. rq->rt.overloaded)
  580. push_rt_tasks(rq);
  581. }
  582. static unsigned long
  583. load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  584. unsigned long max_load_move,
  585. struct sched_domain *sd, enum cpu_idle_type idle,
  586. int *all_pinned, int *this_best_prio)
  587. {
  588. /* don't touch RT tasks */
  589. return 0;
  590. }
  591. static int
  592. move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  593. struct sched_domain *sd, enum cpu_idle_type idle)
  594. {
  595. /* don't touch RT tasks */
  596. return 0;
  597. }
  598. static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
  599. {
  600. int weight = cpus_weight(*new_mask);
  601. BUG_ON(!rt_task(p));
  602. /*
  603. * Update the migration status of the RQ if we have an RT task
  604. * which is running AND changing its weight value.
  605. */
  606. if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
  607. struct rq *rq = task_rq(p);
  608. if ((p->nr_cpus_allowed <= 1) && (weight > 1)) {
  609. rq->rt.rt_nr_migratory++;
  610. } else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {
  611. BUG_ON(!rq->rt.rt_nr_migratory);
  612. rq->rt.rt_nr_migratory--;
  613. }
  614. update_rt_migration(rq);
  615. }
  616. p->cpus_allowed = *new_mask;
  617. p->nr_cpus_allowed = weight;
  618. }
  619. /* Assumes rq->lock is held */
  620. static void join_domain_rt(struct rq *rq)
  621. {
  622. if (rq->rt.overloaded)
  623. rt_set_overload(rq);
  624. }
  625. /* Assumes rq->lock is held */
  626. static void leave_domain_rt(struct rq *rq)
  627. {
  628. if (rq->rt.overloaded)
  629. rt_clear_overload(rq);
  630. }
  631. /*
  632. * When switch from the rt queue, we bring ourselves to a position
  633. * that we might want to pull RT tasks from other runqueues.
  634. */
  635. static void switched_from_rt(struct rq *rq, struct task_struct *p,
  636. int running)
  637. {
  638. /*
  639. * If there are other RT tasks then we will reschedule
  640. * and the scheduling of the other RT tasks will handle
  641. * the balancing. But if we are the last RT task
  642. * we may need to handle the pulling of RT tasks
  643. * now.
  644. */
  645. if (!rq->rt.rt_nr_running)
  646. pull_rt_task(rq);
  647. }
  648. #endif /* CONFIG_SMP */
  649. /*
  650. * When switching a task to RT, we may overload the runqueue
  651. * with RT tasks. In this case we try to push them off to
  652. * other runqueues.
  653. */
  654. static void switched_to_rt(struct rq *rq, struct task_struct *p,
  655. int running)
  656. {
  657. int check_resched = 1;
  658. /*
  659. * If we are already running, then there's nothing
  660. * that needs to be done. But if we are not running
  661. * we may need to preempt the current running task.
  662. * If that current running task is also an RT task
  663. * then see if we can move to another run queue.
  664. */
  665. if (!running) {
  666. #ifdef CONFIG_SMP
  667. if (rq->rt.overloaded && push_rt_task(rq) &&
  668. /* Don't resched if we changed runqueues */
  669. rq != task_rq(p))
  670. check_resched = 0;
  671. #endif /* CONFIG_SMP */
  672. if (check_resched && p->prio < rq->curr->prio)
  673. resched_task(rq->curr);
  674. }
  675. }
  676. /*
  677. * Priority of the task has changed. This may cause
  678. * us to initiate a push or pull.
  679. */
  680. static void prio_changed_rt(struct rq *rq, struct task_struct *p,
  681. int oldprio, int running)
  682. {
  683. if (running) {
  684. #ifdef CONFIG_SMP
  685. /*
  686. * If our priority decreases while running, we
  687. * may need to pull tasks to this runqueue.
  688. */
  689. if (oldprio < p->prio)
  690. pull_rt_task(rq);
  691. /*
  692. * If there's a higher priority task waiting to run
  693. * then reschedule.
  694. */
  695. if (p->prio > rq->rt.highest_prio)
  696. resched_task(p);
  697. #else
  698. /* For UP simply resched on drop of prio */
  699. if (oldprio < p->prio)
  700. resched_task(p);
  701. #endif /* CONFIG_SMP */
  702. } else {
  703. /*
  704. * This task is not running, but if it is
  705. * greater than the current running task
  706. * then reschedule.
  707. */
  708. if (p->prio < rq->curr->prio)
  709. resched_task(rq->curr);
  710. }
  711. }
  712. static void watchdog(struct rq *rq, struct task_struct *p)
  713. {
  714. unsigned long soft, hard;
  715. if (!p->signal)
  716. return;
  717. soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
  718. hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
  719. if (soft != RLIM_INFINITY) {
  720. unsigned long next;
  721. p->rt.timeout++;
  722. next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
  723. if (next > p->rt.timeout) {
  724. u64 next_time = p->se.sum_exec_runtime;
  725. next_time += next * (NSEC_PER_SEC/HZ);
  726. if (p->it_sched_expires > next_time)
  727. p->it_sched_expires = next_time;
  728. } else
  729. p->it_sched_expires = p->se.sum_exec_runtime;
  730. }
  731. }
  732. static void task_tick_rt(struct rq *rq, struct task_struct *p)
  733. {
  734. update_curr_rt(rq);
  735. watchdog(rq, p);
  736. /*
  737. * RR tasks need a special form of timeslice management.
  738. * FIFO tasks have no timeslices.
  739. */
  740. if (p->policy != SCHED_RR)
  741. return;
  742. if (--p->rt.time_slice)
  743. return;
  744. p->rt.time_slice = DEF_TIMESLICE;
  745. /*
  746. * Requeue to the end of queue if we are not the only element
  747. * on the queue:
  748. */
  749. if (p->rt.run_list.prev != p->rt.run_list.next) {
  750. requeue_task_rt(rq, p);
  751. set_tsk_need_resched(p);
  752. }
  753. }
  754. static void set_curr_task_rt(struct rq *rq)
  755. {
  756. struct task_struct *p = rq->curr;
  757. p->se.exec_start = rq->clock;
  758. }
  759. const struct sched_class rt_sched_class = {
  760. .next = &fair_sched_class,
  761. .enqueue_task = enqueue_task_rt,
  762. .dequeue_task = dequeue_task_rt,
  763. .yield_task = yield_task_rt,
  764. #ifdef CONFIG_SMP
  765. .select_task_rq = select_task_rq_rt,
  766. #endif /* CONFIG_SMP */
  767. .check_preempt_curr = check_preempt_curr_rt,
  768. .pick_next_task = pick_next_task_rt,
  769. .put_prev_task = put_prev_task_rt,
  770. #ifdef CONFIG_SMP
  771. .load_balance = load_balance_rt,
  772. .move_one_task = move_one_task_rt,
  773. .set_cpus_allowed = set_cpus_allowed_rt,
  774. .join_domain = join_domain_rt,
  775. .leave_domain = leave_domain_rt,
  776. .pre_schedule = pre_schedule_rt,
  777. .post_schedule = post_schedule_rt,
  778. .task_wake_up = task_wake_up_rt,
  779. .switched_from = switched_from_rt,
  780. #endif
  781. .set_curr_task = set_curr_task_rt,
  782. .task_tick = task_tick_rt,
  783. .prio_changed = prio_changed_rt,
  784. .switched_to = switched_to_rt,
  785. };