sched_rt.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. /*
  2. * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  3. * policies)
  4. */
  5. #ifdef CONFIG_SMP
  6. /*
  7. * The "RT overload" flag: it gets set if a CPU has more than
  8. * one runnable RT task.
  9. */
  10. static cpumask_t rt_overload_mask;
  11. static atomic_t rto_count;
  12. static inline int rt_overloaded(void)
  13. {
  14. return atomic_read(&rto_count);
  15. }
  16. static inline cpumask_t *rt_overload(void)
  17. {
  18. return &rt_overload_mask;
  19. }
  20. static inline void rt_set_overload(struct rq *rq)
  21. {
  22. rq->rt.overloaded = 1;
  23. cpu_set(rq->cpu, rt_overload_mask);
  24. /*
  25. * Make sure the mask is visible before we set
  26. * the overload count. That is checked to determine
  27. * if we should look at the mask. It would be a shame
  28. * if we looked at the mask, but the mask was not
  29. * updated yet.
  30. */
  31. wmb();
  32. atomic_inc(&rto_count);
  33. }
  34. static inline void rt_clear_overload(struct rq *rq)
  35. {
  36. /* the order here really doesn't matter */
  37. atomic_dec(&rto_count);
  38. cpu_clear(rq->cpu, rt_overload_mask);
  39. rq->rt.overloaded = 0;
  40. }
  41. static void update_rt_migration(struct rq *rq)
  42. {
  43. if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
  44. rt_set_overload(rq);
  45. else
  46. rt_clear_overload(rq);
  47. }
  48. #endif /* CONFIG_SMP */
  49. /*
  50. * Update the current task's runtime statistics. Skip current tasks that
  51. * are not in our scheduling class.
  52. */
  53. static void update_curr_rt(struct rq *rq)
  54. {
  55. struct task_struct *curr = rq->curr;
  56. u64 delta_exec;
  57. if (!task_has_rt_policy(curr))
  58. return;
  59. delta_exec = rq->clock - curr->se.exec_start;
  60. if (unlikely((s64)delta_exec < 0))
  61. delta_exec = 0;
  62. schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
  63. curr->se.sum_exec_runtime += delta_exec;
  64. curr->se.exec_start = rq->clock;
  65. cpuacct_charge(curr, delta_exec);
  66. }
  67. static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
  68. {
  69. WARN_ON(!rt_task(p));
  70. rq->rt.rt_nr_running++;
  71. #ifdef CONFIG_SMP
  72. if (p->prio < rq->rt.highest_prio)
  73. rq->rt.highest_prio = p->prio;
  74. if (p->nr_cpus_allowed > 1)
  75. rq->rt.rt_nr_migratory++;
  76. update_rt_migration(rq);
  77. #endif /* CONFIG_SMP */
  78. }
  79. static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
  80. {
  81. WARN_ON(!rt_task(p));
  82. WARN_ON(!rq->rt.rt_nr_running);
  83. rq->rt.rt_nr_running--;
  84. #ifdef CONFIG_SMP
  85. if (rq->rt.rt_nr_running) {
  86. struct rt_prio_array *array;
  87. WARN_ON(p->prio < rq->rt.highest_prio);
  88. if (p->prio == rq->rt.highest_prio) {
  89. /* recalculate */
  90. array = &rq->rt.active;
  91. rq->rt.highest_prio =
  92. sched_find_first_bit(array->bitmap);
  93. } /* otherwise leave rq->highest prio alone */
  94. } else
  95. rq->rt.highest_prio = MAX_RT_PRIO;
  96. if (p->nr_cpus_allowed > 1)
  97. rq->rt.rt_nr_migratory--;
  98. update_rt_migration(rq);
  99. #endif /* CONFIG_SMP */
  100. }
  101. static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
  102. {
  103. struct rt_prio_array *array = &rq->rt.active;
  104. list_add_tail(&p->run_list, array->queue + p->prio);
  105. __set_bit(p->prio, array->bitmap);
  106. inc_cpu_load(rq, p->se.load.weight);
  107. inc_rt_tasks(p, rq);
  108. }
  109. /*
  110. * Adding/removing a task to/from a priority array:
  111. */
  112. static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
  113. {
  114. struct rt_prio_array *array = &rq->rt.active;
  115. update_curr_rt(rq);
  116. list_del(&p->run_list);
  117. if (list_empty(array->queue + p->prio))
  118. __clear_bit(p->prio, array->bitmap);
  119. dec_cpu_load(rq, p->se.load.weight);
  120. dec_rt_tasks(p, rq);
  121. }
  122. /*
  123. * Put task to the end of the run list without the overhead of dequeue
  124. * followed by enqueue.
  125. */
  126. static void requeue_task_rt(struct rq *rq, struct task_struct *p)
  127. {
  128. struct rt_prio_array *array = &rq->rt.active;
  129. list_move_tail(&p->run_list, array->queue + p->prio);
  130. }
  131. static void
  132. yield_task_rt(struct rq *rq)
  133. {
  134. requeue_task_rt(rq, rq->curr);
  135. }
  136. #ifdef CONFIG_SMP
  137. static int find_lowest_rq(struct task_struct *task);
  138. static int select_task_rq_rt(struct task_struct *p, int sync)
  139. {
  140. struct rq *rq = task_rq(p);
  141. /*
  142. * If the current task is an RT task, then
  143. * try to see if we can wake this RT task up on another
  144. * runqueue. Otherwise simply start this RT task
  145. * on its current runqueue.
  146. *
  147. * We want to avoid overloading runqueues. Even if
  148. * the RT task is of higher priority than the current RT task.
  149. * RT tasks behave differently than other tasks. If
  150. * one gets preempted, we try to push it off to another queue.
  151. * So trying to keep a preempting RT task on the same
  152. * cache hot CPU will force the running RT task to
  153. * a cold CPU. So we waste all the cache for the lower
  154. * RT task in hopes of saving some of a RT task
  155. * that is just being woken and probably will have
  156. * cold cache anyway.
  157. */
  158. if (unlikely(rt_task(rq->curr)) &&
  159. (p->nr_cpus_allowed > 1)) {
  160. int cpu = find_lowest_rq(p);
  161. return (cpu == -1) ? task_cpu(p) : cpu;
  162. }
  163. /*
  164. * Otherwise, just let it ride on the affined RQ and the
  165. * post-schedule router will push the preempted task away
  166. */
  167. return task_cpu(p);
  168. }
  169. #endif /* CONFIG_SMP */
  170. /*
  171. * Preempt the current task with a newly woken task if needed:
  172. */
  173. static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
  174. {
  175. if (p->prio < rq->curr->prio)
  176. resched_task(rq->curr);
  177. }
  178. static struct task_struct *pick_next_task_rt(struct rq *rq)
  179. {
  180. struct rt_prio_array *array = &rq->rt.active;
  181. struct task_struct *next;
  182. struct list_head *queue;
  183. int idx;
  184. idx = sched_find_first_bit(array->bitmap);
  185. if (idx >= MAX_RT_PRIO)
  186. return NULL;
  187. queue = array->queue + idx;
  188. next = list_entry(queue->next, struct task_struct, run_list);
  189. next->se.exec_start = rq->clock;
  190. return next;
  191. }
  192. static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  193. {
  194. update_curr_rt(rq);
  195. p->se.exec_start = 0;
  196. }
  197. #ifdef CONFIG_SMP
  198. /* Only try algorithms three times */
  199. #define RT_MAX_TRIES 3
  200. static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
  201. static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
  202. static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
  203. {
  204. if (!task_running(rq, p) &&
  205. (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
  206. (p->nr_cpus_allowed > 1))
  207. return 1;
  208. return 0;
  209. }
  210. /* Return the second highest RT task, NULL otherwise */
  211. static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
  212. {
  213. struct rt_prio_array *array = &rq->rt.active;
  214. struct task_struct *next;
  215. struct list_head *queue;
  216. int idx;
  217. assert_spin_locked(&rq->lock);
  218. if (likely(rq->rt.rt_nr_running < 2))
  219. return NULL;
  220. idx = sched_find_first_bit(array->bitmap);
  221. if (unlikely(idx >= MAX_RT_PRIO)) {
  222. WARN_ON(1); /* rt_nr_running is bad */
  223. return NULL;
  224. }
  225. queue = array->queue + idx;
  226. BUG_ON(list_empty(queue));
  227. next = list_entry(queue->next, struct task_struct, run_list);
  228. if (unlikely(pick_rt_task(rq, next, cpu)))
  229. goto out;
  230. if (queue->next->next != queue) {
  231. /* same prio task */
  232. next = list_entry(queue->next->next, struct task_struct,
  233. run_list);
  234. if (pick_rt_task(rq, next, cpu))
  235. goto out;
  236. }
  237. retry:
  238. /* slower, but more flexible */
  239. idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
  240. if (unlikely(idx >= MAX_RT_PRIO))
  241. return NULL;
  242. queue = array->queue + idx;
  243. BUG_ON(list_empty(queue));
  244. list_for_each_entry(next, queue, run_list) {
  245. if (pick_rt_task(rq, next, cpu))
  246. goto out;
  247. }
  248. goto retry;
  249. out:
  250. return next;
  251. }
  252. static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
  253. static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
  254. {
  255. int lowest_prio = -1;
  256. int lowest_cpu = -1;
  257. int count = 0;
  258. int cpu;
  259. cpus_and(*lowest_mask, cpu_online_map, task->cpus_allowed);
  260. /*
  261. * Scan each rq for the lowest prio.
  262. */
  263. for_each_cpu_mask(cpu, *lowest_mask) {
  264. struct rq *rq = cpu_rq(cpu);
  265. /* We look for lowest RT prio or non-rt CPU */
  266. if (rq->rt.highest_prio >= MAX_RT_PRIO) {
  267. /*
  268. * if we already found a low RT queue
  269. * and now we found this non-rt queue
  270. * clear the mask and set our bit.
  271. * Otherwise just return the queue as is
  272. * and the count==1 will cause the algorithm
  273. * to use the first bit found.
  274. */
  275. if (lowest_cpu != -1) {
  276. cpus_clear(*lowest_mask);
  277. cpu_set(rq->cpu, *lowest_mask);
  278. }
  279. return 1;
  280. }
  281. /* no locking for now */
  282. if ((rq->rt.highest_prio > task->prio)
  283. && (rq->rt.highest_prio >= lowest_prio)) {
  284. if (rq->rt.highest_prio > lowest_prio) {
  285. /* new low - clear old data */
  286. lowest_prio = rq->rt.highest_prio;
  287. lowest_cpu = cpu;
  288. count = 0;
  289. }
  290. count++;
  291. } else
  292. cpu_clear(cpu, *lowest_mask);
  293. }
  294. /*
  295. * Clear out all the set bits that represent
  296. * runqueues that were of higher prio than
  297. * the lowest_prio.
  298. */
  299. if (lowest_cpu > 0) {
  300. /*
  301. * Perhaps we could add another cpumask op to
  302. * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
  303. * Then that could be optimized to use memset and such.
  304. */
  305. for_each_cpu_mask(cpu, *lowest_mask) {
  306. if (cpu >= lowest_cpu)
  307. break;
  308. cpu_clear(cpu, *lowest_mask);
  309. }
  310. }
  311. return count;
  312. }
  313. static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
  314. {
  315. int first;
  316. /* "this_cpu" is cheaper to preempt than a remote processor */
  317. if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
  318. return this_cpu;
  319. first = first_cpu(*mask);
  320. if (first != NR_CPUS)
  321. return first;
  322. return -1;
  323. }
  324. static int find_lowest_rq(struct task_struct *task)
  325. {
  326. struct sched_domain *sd;
  327. cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
  328. int this_cpu = smp_processor_id();
  329. int cpu = task_cpu(task);
  330. int count = find_lowest_cpus(task, lowest_mask);
  331. if (!count)
  332. return -1; /* No targets found */
  333. /*
  334. * There is no sense in performing an optimal search if only one
  335. * target is found.
  336. */
  337. if (count == 1)
  338. return first_cpu(*lowest_mask);
  339. /*
  340. * At this point we have built a mask of cpus representing the
  341. * lowest priority tasks in the system. Now we want to elect
  342. * the best one based on our affinity and topology.
  343. *
  344. * We prioritize the last cpu that the task executed on since
  345. * it is most likely cache-hot in that location.
  346. */
  347. if (cpu_isset(cpu, *lowest_mask))
  348. return cpu;
  349. /*
  350. * Otherwise, we consult the sched_domains span maps to figure
  351. * out which cpu is logically closest to our hot cache data.
  352. */
  353. if (this_cpu == cpu)
  354. this_cpu = -1; /* Skip this_cpu opt if the same */
  355. for_each_domain(cpu, sd) {
  356. if (sd->flags & SD_WAKE_AFFINE) {
  357. cpumask_t domain_mask;
  358. int best_cpu;
  359. cpus_and(domain_mask, sd->span, *lowest_mask);
  360. best_cpu = pick_optimal_cpu(this_cpu,
  361. &domain_mask);
  362. if (best_cpu != -1)
  363. return best_cpu;
  364. }
  365. }
  366. /*
  367. * And finally, if there were no matches within the domains
  368. * just give the caller *something* to work with from the compatible
  369. * locations.
  370. */
  371. return pick_optimal_cpu(this_cpu, lowest_mask);
  372. }
  373. /* Will lock the rq it finds */
  374. static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
  375. {
  376. struct rq *lowest_rq = NULL;
  377. int tries;
  378. int cpu;
  379. for (tries = 0; tries < RT_MAX_TRIES; tries++) {
  380. cpu = find_lowest_rq(task);
  381. if ((cpu == -1) || (cpu == rq->cpu))
  382. break;
  383. lowest_rq = cpu_rq(cpu);
  384. /* if the prio of this runqueue changed, try again */
  385. if (double_lock_balance(rq, lowest_rq)) {
  386. /*
  387. * We had to unlock the run queue. In
  388. * the mean time, task could have
  389. * migrated already or had its affinity changed.
  390. * Also make sure that it wasn't scheduled on its rq.
  391. */
  392. if (unlikely(task_rq(task) != rq ||
  393. !cpu_isset(lowest_rq->cpu,
  394. task->cpus_allowed) ||
  395. task_running(rq, task) ||
  396. !task->se.on_rq)) {
  397. spin_unlock(&lowest_rq->lock);
  398. lowest_rq = NULL;
  399. break;
  400. }
  401. }
  402. /* If this rq is still suitable use it. */
  403. if (lowest_rq->rt.highest_prio > task->prio)
  404. break;
  405. /* try again */
  406. spin_unlock(&lowest_rq->lock);
  407. lowest_rq = NULL;
  408. }
  409. return lowest_rq;
  410. }
  411. /*
  412. * If the current CPU has more than one RT task, see if the non
  413. * running task can migrate over to a CPU that is running a task
  414. * of lesser priority.
  415. */
  416. static int push_rt_task(struct rq *rq)
  417. {
  418. struct task_struct *next_task;
  419. struct rq *lowest_rq;
  420. int ret = 0;
  421. int paranoid = RT_MAX_TRIES;
  422. assert_spin_locked(&rq->lock);
  423. if (!rq->rt.overloaded)
  424. return 0;
  425. next_task = pick_next_highest_task_rt(rq, -1);
  426. if (!next_task)
  427. return 0;
  428. retry:
  429. if (unlikely(next_task == rq->curr)) {
  430. WARN_ON(1);
  431. return 0;
  432. }
  433. /*
  434. * It's possible that the next_task slipped in of
  435. * higher priority than current. If that's the case
  436. * just reschedule current.
  437. */
  438. if (unlikely(next_task->prio < rq->curr->prio)) {
  439. resched_task(rq->curr);
  440. return 0;
  441. }
  442. /* We might release rq lock */
  443. get_task_struct(next_task);
  444. /* find_lock_lowest_rq locks the rq if found */
  445. lowest_rq = find_lock_lowest_rq(next_task, rq);
  446. if (!lowest_rq) {
  447. struct task_struct *task;
  448. /*
  449. * find lock_lowest_rq releases rq->lock
  450. * so it is possible that next_task has changed.
  451. * If it has, then try again.
  452. */
  453. task = pick_next_highest_task_rt(rq, -1);
  454. if (unlikely(task != next_task) && task && paranoid--) {
  455. put_task_struct(next_task);
  456. next_task = task;
  457. goto retry;
  458. }
  459. goto out;
  460. }
  461. assert_spin_locked(&lowest_rq->lock);
  462. deactivate_task(rq, next_task, 0);
  463. set_task_cpu(next_task, lowest_rq->cpu);
  464. activate_task(lowest_rq, next_task, 0);
  465. resched_task(lowest_rq->curr);
  466. spin_unlock(&lowest_rq->lock);
  467. ret = 1;
  468. out:
  469. put_task_struct(next_task);
  470. return ret;
  471. }
  472. /*
  473. * TODO: Currently we just use the second highest prio task on
  474. * the queue, and stop when it can't migrate (or there's
  475. * no more RT tasks). There may be a case where a lower
  476. * priority RT task has a different affinity than the
  477. * higher RT task. In this case the lower RT task could
  478. * possibly be able to migrate where as the higher priority
  479. * RT task could not. We currently ignore this issue.
  480. * Enhancements are welcome!
  481. */
  482. static void push_rt_tasks(struct rq *rq)
  483. {
  484. /* push_rt_task will return true if it moved an RT */
  485. while (push_rt_task(rq))
  486. ;
  487. }
  488. static int pull_rt_task(struct rq *this_rq)
  489. {
  490. struct task_struct *next;
  491. struct task_struct *p;
  492. struct rq *src_rq;
  493. cpumask_t *rto_cpumask;
  494. int this_cpu = this_rq->cpu;
  495. int cpu;
  496. int ret = 0;
  497. assert_spin_locked(&this_rq->lock);
  498. /*
  499. * If cpusets are used, and we have overlapping
  500. * run queue cpusets, then this algorithm may not catch all.
  501. * This is just the price you pay on trying to keep
  502. * dirtying caches down on large SMP machines.
  503. */
  504. if (likely(!rt_overloaded()))
  505. return 0;
  506. next = pick_next_task_rt(this_rq);
  507. rto_cpumask = rt_overload();
  508. for_each_cpu_mask(cpu, *rto_cpumask) {
  509. if (this_cpu == cpu)
  510. continue;
  511. src_rq = cpu_rq(cpu);
  512. if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
  513. /*
  514. * It is possible that overlapping cpusets
  515. * will miss clearing a non overloaded runqueue.
  516. * Clear it now.
  517. */
  518. if (double_lock_balance(this_rq, src_rq)) {
  519. /* unlocked our runqueue lock */
  520. struct task_struct *old_next = next;
  521. next = pick_next_task_rt(this_rq);
  522. if (next != old_next)
  523. ret = 1;
  524. }
  525. if (likely(src_rq->rt.rt_nr_running <= 1))
  526. /*
  527. * Small chance that this_rq->curr changed
  528. * but it's really harmless here.
  529. */
  530. rt_clear_overload(this_rq);
  531. else
  532. /*
  533. * Heh, the src_rq is now overloaded, since
  534. * we already have the src_rq lock, go straight
  535. * to pulling tasks from it.
  536. */
  537. goto try_pulling;
  538. spin_unlock(&src_rq->lock);
  539. continue;
  540. }
  541. /*
  542. * We can potentially drop this_rq's lock in
  543. * double_lock_balance, and another CPU could
  544. * steal our next task - hence we must cause
  545. * the caller to recalculate the next task
  546. * in that case:
  547. */
  548. if (double_lock_balance(this_rq, src_rq)) {
  549. struct task_struct *old_next = next;
  550. next = pick_next_task_rt(this_rq);
  551. if (next != old_next)
  552. ret = 1;
  553. }
  554. /*
  555. * Are there still pullable RT tasks?
  556. */
  557. if (src_rq->rt.rt_nr_running <= 1) {
  558. spin_unlock(&src_rq->lock);
  559. continue;
  560. }
  561. try_pulling:
  562. p = pick_next_highest_task_rt(src_rq, this_cpu);
  563. /*
  564. * Do we have an RT task that preempts
  565. * the to-be-scheduled task?
  566. */
  567. if (p && (!next || (p->prio < next->prio))) {
  568. WARN_ON(p == src_rq->curr);
  569. WARN_ON(!p->se.on_rq);
  570. /*
  571. * There's a chance that p is higher in priority
  572. * than what's currently running on its cpu.
  573. * This is just that p is wakeing up and hasn't
  574. * had a chance to schedule. We only pull
  575. * p if it is lower in priority than the
  576. * current task on the run queue or
  577. * this_rq next task is lower in prio than
  578. * the current task on that rq.
  579. */
  580. if (p->prio < src_rq->curr->prio ||
  581. (next && next->prio < src_rq->curr->prio))
  582. goto bail;
  583. ret = 1;
  584. deactivate_task(src_rq, p, 0);
  585. set_task_cpu(p, this_cpu);
  586. activate_task(this_rq, p, 0);
  587. /*
  588. * We continue with the search, just in
  589. * case there's an even higher prio task
  590. * in another runqueue. (low likelyhood
  591. * but possible)
  592. */
  593. /*
  594. * Update next so that we won't pick a task
  595. * on another cpu with a priority lower (or equal)
  596. * than the one we just picked.
  597. */
  598. next = p;
  599. }
  600. bail:
  601. spin_unlock(&src_rq->lock);
  602. }
  603. return ret;
  604. }
  605. static void schedule_balance_rt(struct rq *rq,
  606. struct task_struct *prev)
  607. {
  608. /* Try to pull RT tasks here if we lower this rq's prio */
  609. if (unlikely(rt_task(prev)) &&
  610. rq->rt.highest_prio > prev->prio)
  611. pull_rt_task(rq);
  612. }
  613. static void schedule_tail_balance_rt(struct rq *rq)
  614. {
  615. /*
  616. * If we have more than one rt_task queued, then
  617. * see if we can push the other rt_tasks off to other CPUS.
  618. * Note we may release the rq lock, and since
  619. * the lock was owned by prev, we need to release it
  620. * first via finish_lock_switch and then reaquire it here.
  621. */
  622. if (unlikely(rq->rt.overloaded)) {
  623. spin_lock_irq(&rq->lock);
  624. push_rt_tasks(rq);
  625. spin_unlock_irq(&rq->lock);
  626. }
  627. }
  628. static void wakeup_balance_rt(struct rq *rq, struct task_struct *p)
  629. {
  630. if (unlikely(rt_task(p)) &&
  631. !task_running(rq, p) &&
  632. (p->prio >= rq->rt.highest_prio) &&
  633. rq->rt.overloaded)
  634. push_rt_tasks(rq);
  635. }
  636. static unsigned long
  637. load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  638. unsigned long max_load_move,
  639. struct sched_domain *sd, enum cpu_idle_type idle,
  640. int *all_pinned, int *this_best_prio)
  641. {
  642. /* don't touch RT tasks */
  643. return 0;
  644. }
  645. static int
  646. move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  647. struct sched_domain *sd, enum cpu_idle_type idle)
  648. {
  649. /* don't touch RT tasks */
  650. return 0;
  651. }
  652. static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
  653. {
  654. int weight = cpus_weight(*new_mask);
  655. BUG_ON(!rt_task(p));
  656. /*
  657. * Update the migration status of the RQ if we have an RT task
  658. * which is running AND changing its weight value.
  659. */
  660. if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
  661. struct rq *rq = task_rq(p);
  662. if ((p->nr_cpus_allowed <= 1) && (weight > 1)) {
  663. rq->rt.rt_nr_migratory++;
  664. } else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {
  665. BUG_ON(!rq->rt.rt_nr_migratory);
  666. rq->rt.rt_nr_migratory--;
  667. }
  668. update_rt_migration(rq);
  669. }
  670. p->cpus_allowed = *new_mask;
  671. p->nr_cpus_allowed = weight;
  672. }
  673. #else /* CONFIG_SMP */
  674. # define schedule_tail_balance_rt(rq) do { } while (0)
  675. # define schedule_balance_rt(rq, prev) do { } while (0)
  676. # define wakeup_balance_rt(rq, p) do { } while (0)
  677. #endif /* CONFIG_SMP */
  678. static void task_tick_rt(struct rq *rq, struct task_struct *p)
  679. {
  680. update_curr_rt(rq);
  681. /*
  682. * RR tasks need a special form of timeslice management.
  683. * FIFO tasks have no timeslices.
  684. */
  685. if (p->policy != SCHED_RR)
  686. return;
  687. if (--p->time_slice)
  688. return;
  689. p->time_slice = DEF_TIMESLICE;
  690. /*
  691. * Requeue to the end of queue if we are not the only element
  692. * on the queue:
  693. */
  694. if (p->run_list.prev != p->run_list.next) {
  695. requeue_task_rt(rq, p);
  696. set_tsk_need_resched(p);
  697. }
  698. }
  699. static void set_curr_task_rt(struct rq *rq)
  700. {
  701. struct task_struct *p = rq->curr;
  702. p->se.exec_start = rq->clock;
  703. }
  704. const struct sched_class rt_sched_class = {
  705. .next = &fair_sched_class,
  706. .enqueue_task = enqueue_task_rt,
  707. .dequeue_task = dequeue_task_rt,
  708. .yield_task = yield_task_rt,
  709. #ifdef CONFIG_SMP
  710. .select_task_rq = select_task_rq_rt,
  711. #endif /* CONFIG_SMP */
  712. .check_preempt_curr = check_preempt_curr_rt,
  713. .pick_next_task = pick_next_task_rt,
  714. .put_prev_task = put_prev_task_rt,
  715. #ifdef CONFIG_SMP
  716. .load_balance = load_balance_rt,
  717. .move_one_task = move_one_task_rt,
  718. .set_cpus_allowed = set_cpus_allowed_rt,
  719. #endif
  720. .set_curr_task = set_curr_task_rt,
  721. .task_tick = task_tick_rt,
  722. };