sched_rt.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. /*
  2. * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  3. * policies)
  4. */
  5. #ifdef CONFIG_SMP
  6. static cpumask_t rt_overload_mask;
  7. static atomic_t rto_count;
  8. static inline int rt_overloaded(void)
  9. {
  10. return atomic_read(&rto_count);
  11. }
  12. static inline cpumask_t *rt_overload(void)
  13. {
  14. return &rt_overload_mask;
  15. }
  16. static inline void rt_set_overload(struct rq *rq)
  17. {
  18. rq->rt.overloaded = 1;
  19. cpu_set(rq->cpu, rt_overload_mask);
  20. /*
  21. * Make sure the mask is visible before we set
  22. * the overload count. That is checked to determine
  23. * if we should look at the mask. It would be a shame
  24. * if we looked at the mask, but the mask was not
  25. * updated yet.
  26. */
  27. wmb();
  28. atomic_inc(&rto_count);
  29. }
  30. static inline void rt_clear_overload(struct rq *rq)
  31. {
  32. /* the order here really doesn't matter */
  33. atomic_dec(&rto_count);
  34. cpu_clear(rq->cpu, rt_overload_mask);
  35. rq->rt.overloaded = 0;
  36. }
  37. static void update_rt_migration(struct rq *rq)
  38. {
  39. if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
  40. rt_set_overload(rq);
  41. else
  42. rt_clear_overload(rq);
  43. }
  44. #endif /* CONFIG_SMP */
  45. /*
  46. * Update the current task's runtime statistics. Skip current tasks that
  47. * are not in our scheduling class.
  48. */
  49. static void update_curr_rt(struct rq *rq)
  50. {
  51. struct task_struct *curr = rq->curr;
  52. u64 delta_exec;
  53. if (!task_has_rt_policy(curr))
  54. return;
  55. delta_exec = rq->clock - curr->se.exec_start;
  56. if (unlikely((s64)delta_exec < 0))
  57. delta_exec = 0;
  58. schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
  59. curr->se.sum_exec_runtime += delta_exec;
  60. curr->se.exec_start = rq->clock;
  61. cpuacct_charge(curr, delta_exec);
  62. }
  63. static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
  64. {
  65. WARN_ON(!rt_task(p));
  66. rq->rt.rt_nr_running++;
  67. #ifdef CONFIG_SMP
  68. if (p->prio < rq->rt.highest_prio)
  69. rq->rt.highest_prio = p->prio;
  70. if (p->nr_cpus_allowed > 1)
  71. rq->rt.rt_nr_migratory++;
  72. update_rt_migration(rq);
  73. #endif /* CONFIG_SMP */
  74. }
  75. static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
  76. {
  77. WARN_ON(!rt_task(p));
  78. WARN_ON(!rq->rt.rt_nr_running);
  79. rq->rt.rt_nr_running--;
  80. #ifdef CONFIG_SMP
  81. if (rq->rt.rt_nr_running) {
  82. struct rt_prio_array *array;
  83. WARN_ON(p->prio < rq->rt.highest_prio);
  84. if (p->prio == rq->rt.highest_prio) {
  85. /* recalculate */
  86. array = &rq->rt.active;
  87. rq->rt.highest_prio =
  88. sched_find_first_bit(array->bitmap);
  89. } /* otherwise leave rq->highest prio alone */
  90. } else
  91. rq->rt.highest_prio = MAX_RT_PRIO;
  92. if (p->nr_cpus_allowed > 1)
  93. rq->rt.rt_nr_migratory--;
  94. update_rt_migration(rq);
  95. #endif /* CONFIG_SMP */
  96. }
  97. static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
  98. {
  99. struct rt_prio_array *array = &rq->rt.active;
  100. list_add_tail(&p->run_list, array->queue + p->prio);
  101. __set_bit(p->prio, array->bitmap);
  102. inc_cpu_load(rq, p->se.load.weight);
  103. inc_rt_tasks(p, rq);
  104. }
  105. /*
  106. * Adding/removing a task to/from a priority array:
  107. */
  108. static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
  109. {
  110. struct rt_prio_array *array = &rq->rt.active;
  111. update_curr_rt(rq);
  112. list_del(&p->run_list);
  113. if (list_empty(array->queue + p->prio))
  114. __clear_bit(p->prio, array->bitmap);
  115. dec_cpu_load(rq, p->se.load.weight);
  116. dec_rt_tasks(p, rq);
  117. }
  118. /*
  119. * Put task to the end of the run list without the overhead of dequeue
  120. * followed by enqueue.
  121. */
  122. static void requeue_task_rt(struct rq *rq, struct task_struct *p)
  123. {
  124. struct rt_prio_array *array = &rq->rt.active;
  125. list_move_tail(&p->run_list, array->queue + p->prio);
  126. }
  127. static void
  128. yield_task_rt(struct rq *rq)
  129. {
  130. requeue_task_rt(rq, rq->curr);
  131. }
  132. #ifdef CONFIG_SMP
  133. static int find_lowest_rq(struct task_struct *task);
  134. static int select_task_rq_rt(struct task_struct *p, int sync)
  135. {
  136. struct rq *rq = task_rq(p);
  137. /*
  138. * If the current task is an RT task, then
  139. * try to see if we can wake this RT task up on another
  140. * runqueue. Otherwise simply start this RT task
  141. * on its current runqueue.
  142. *
  143. * We want to avoid overloading runqueues. Even if
  144. * the RT task is of higher priority than the current RT task.
  145. * RT tasks behave differently than other tasks. If
  146. * one gets preempted, we try to push it off to another queue.
  147. * So trying to keep a preempting RT task on the same
  148. * cache hot CPU will force the running RT task to
  149. * a cold CPU. So we waste all the cache for the lower
  150. * RT task in hopes of saving some of a RT task
  151. * that is just being woken and probably will have
  152. * cold cache anyway.
  153. */
  154. if (unlikely(rt_task(rq->curr)) &&
  155. (p->nr_cpus_allowed > 1)) {
  156. int cpu = find_lowest_rq(p);
  157. return (cpu == -1) ? task_cpu(p) : cpu;
  158. }
  159. /*
  160. * Otherwise, just let it ride on the affined RQ and the
  161. * post-schedule router will push the preempted task away
  162. */
  163. return task_cpu(p);
  164. }
  165. #endif /* CONFIG_SMP */
  166. /*
  167. * Preempt the current task with a newly woken task if needed:
  168. */
  169. static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
  170. {
  171. if (p->prio < rq->curr->prio)
  172. resched_task(rq->curr);
  173. }
  174. static struct task_struct *pick_next_task_rt(struct rq *rq)
  175. {
  176. struct rt_prio_array *array = &rq->rt.active;
  177. struct task_struct *next;
  178. struct list_head *queue;
  179. int idx;
  180. idx = sched_find_first_bit(array->bitmap);
  181. if (idx >= MAX_RT_PRIO)
  182. return NULL;
  183. queue = array->queue + idx;
  184. next = list_entry(queue->next, struct task_struct, run_list);
  185. next->se.exec_start = rq->clock;
  186. return next;
  187. }
  188. static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  189. {
  190. update_curr_rt(rq);
  191. p->se.exec_start = 0;
  192. }
  193. #ifdef CONFIG_SMP
  194. /* Only try algorithms three times */
  195. #define RT_MAX_TRIES 3
  196. static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
  197. static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
  198. static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
  199. {
  200. if (!task_running(rq, p) &&
  201. (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
  202. (p->nr_cpus_allowed > 1))
  203. return 1;
  204. return 0;
  205. }
  206. /* Return the second highest RT task, NULL otherwise */
  207. static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
  208. {
  209. struct rt_prio_array *array = &rq->rt.active;
  210. struct task_struct *next;
  211. struct list_head *queue;
  212. int idx;
  213. assert_spin_locked(&rq->lock);
  214. if (likely(rq->rt.rt_nr_running < 2))
  215. return NULL;
  216. idx = sched_find_first_bit(array->bitmap);
  217. if (unlikely(idx >= MAX_RT_PRIO)) {
  218. WARN_ON(1); /* rt_nr_running is bad */
  219. return NULL;
  220. }
  221. queue = array->queue + idx;
  222. BUG_ON(list_empty(queue));
  223. next = list_entry(queue->next, struct task_struct, run_list);
  224. if (unlikely(pick_rt_task(rq, next, cpu)))
  225. goto out;
  226. if (queue->next->next != queue) {
  227. /* same prio task */
  228. next = list_entry(queue->next->next, struct task_struct,
  229. run_list);
  230. if (pick_rt_task(rq, next, cpu))
  231. goto out;
  232. }
  233. retry:
  234. /* slower, but more flexible */
  235. idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
  236. if (unlikely(idx >= MAX_RT_PRIO))
  237. return NULL;
  238. queue = array->queue + idx;
  239. BUG_ON(list_empty(queue));
  240. list_for_each_entry(next, queue, run_list) {
  241. if (pick_rt_task(rq, next, cpu))
  242. goto out;
  243. }
  244. goto retry;
  245. out:
  246. return next;
  247. }
  248. static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
  249. static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
  250. {
  251. int lowest_prio = -1;
  252. int lowest_cpu = -1;
  253. int count = 0;
  254. int cpu;
  255. cpus_and(*lowest_mask, cpu_online_map, task->cpus_allowed);
  256. /*
  257. * Scan each rq for the lowest prio.
  258. */
  259. for_each_cpu_mask(cpu, *lowest_mask) {
  260. struct rq *rq = cpu_rq(cpu);
  261. /* We look for lowest RT prio or non-rt CPU */
  262. if (rq->rt.highest_prio >= MAX_RT_PRIO) {
  263. /*
  264. * if we already found a low RT queue
  265. * and now we found this non-rt queue
  266. * clear the mask and set our bit.
  267. * Otherwise just return the queue as is
  268. * and the count==1 will cause the algorithm
  269. * to use the first bit found.
  270. */
  271. if (lowest_cpu != -1) {
  272. cpus_clear(*lowest_mask);
  273. cpu_set(rq->cpu, *lowest_mask);
  274. }
  275. return 1;
  276. }
  277. /* no locking for now */
  278. if ((rq->rt.highest_prio > task->prio)
  279. && (rq->rt.highest_prio >= lowest_prio)) {
  280. if (rq->rt.highest_prio > lowest_prio) {
  281. /* new low - clear old data */
  282. lowest_prio = rq->rt.highest_prio;
  283. lowest_cpu = cpu;
  284. count = 0;
  285. }
  286. count++;
  287. } else
  288. cpu_clear(cpu, *lowest_mask);
  289. }
  290. /*
  291. * Clear out all the set bits that represent
  292. * runqueues that were of higher prio than
  293. * the lowest_prio.
  294. */
  295. if (lowest_cpu > 0) {
  296. /*
  297. * Perhaps we could add another cpumask op to
  298. * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
  299. * Then that could be optimized to use memset and such.
  300. */
  301. for_each_cpu_mask(cpu, *lowest_mask) {
  302. if (cpu >= lowest_cpu)
  303. break;
  304. cpu_clear(cpu, *lowest_mask);
  305. }
  306. }
  307. return count;
  308. }
  309. static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
  310. {
  311. int first;
  312. /* "this_cpu" is cheaper to preempt than a remote processor */
  313. if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
  314. return this_cpu;
  315. first = first_cpu(*mask);
  316. if (first != NR_CPUS)
  317. return first;
  318. return -1;
  319. }
  320. static int find_lowest_rq(struct task_struct *task)
  321. {
  322. struct sched_domain *sd;
  323. cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
  324. int this_cpu = smp_processor_id();
  325. int cpu = task_cpu(task);
  326. int count = find_lowest_cpus(task, lowest_mask);
  327. if (!count)
  328. return -1; /* No targets found */
  329. /*
  330. * There is no sense in performing an optimal search if only one
  331. * target is found.
  332. */
  333. if (count == 1)
  334. return first_cpu(*lowest_mask);
  335. /*
  336. * At this point we have built a mask of cpus representing the
  337. * lowest priority tasks in the system. Now we want to elect
  338. * the best one based on our affinity and topology.
  339. *
  340. * We prioritize the last cpu that the task executed on since
  341. * it is most likely cache-hot in that location.
  342. */
  343. if (cpu_isset(cpu, *lowest_mask))
  344. return cpu;
  345. /*
  346. * Otherwise, we consult the sched_domains span maps to figure
  347. * out which cpu is logically closest to our hot cache data.
  348. */
  349. if (this_cpu == cpu)
  350. this_cpu = -1; /* Skip this_cpu opt if the same */
  351. for_each_domain(cpu, sd) {
  352. if (sd->flags & SD_WAKE_AFFINE) {
  353. cpumask_t domain_mask;
  354. int best_cpu;
  355. cpus_and(domain_mask, sd->span, *lowest_mask);
  356. best_cpu = pick_optimal_cpu(this_cpu,
  357. &domain_mask);
  358. if (best_cpu != -1)
  359. return best_cpu;
  360. }
  361. }
  362. /*
  363. * And finally, if there were no matches within the domains
  364. * just give the caller *something* to work with from the compatible
  365. * locations.
  366. */
  367. return pick_optimal_cpu(this_cpu, lowest_mask);
  368. }
  369. /* Will lock the rq it finds */
  370. static struct rq *find_lock_lowest_rq(struct task_struct *task,
  371. struct rq *rq)
  372. {
  373. struct rq *lowest_rq = NULL;
  374. int cpu;
  375. int tries;
  376. for (tries = 0; tries < RT_MAX_TRIES; tries++) {
  377. cpu = find_lowest_rq(task);
  378. if ((cpu == -1) || (cpu == rq->cpu))
  379. break;
  380. lowest_rq = cpu_rq(cpu);
  381. /* if the prio of this runqueue changed, try again */
  382. if (double_lock_balance(rq, lowest_rq)) {
  383. /*
  384. * We had to unlock the run queue. In
  385. * the mean time, task could have
  386. * migrated already or had its affinity changed.
  387. * Also make sure that it wasn't scheduled on its rq.
  388. */
  389. if (unlikely(task_rq(task) != rq ||
  390. !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
  391. task_running(rq, task) ||
  392. !task->se.on_rq)) {
  393. spin_unlock(&lowest_rq->lock);
  394. lowest_rq = NULL;
  395. break;
  396. }
  397. }
  398. /* If this rq is still suitable use it. */
  399. if (lowest_rq->rt.highest_prio > task->prio)
  400. break;
  401. /* try again */
  402. spin_unlock(&lowest_rq->lock);
  403. lowest_rq = NULL;
  404. }
  405. return lowest_rq;
  406. }
  407. /*
  408. * If the current CPU has more than one RT task, see if the non
  409. * running task can migrate over to a CPU that is running a task
  410. * of lesser priority.
  411. */
  412. static int push_rt_task(struct rq *rq)
  413. {
  414. struct task_struct *next_task;
  415. struct rq *lowest_rq;
  416. int ret = 0;
  417. int paranoid = RT_MAX_TRIES;
  418. assert_spin_locked(&rq->lock);
  419. if (!rq->rt.overloaded)
  420. return 0;
  421. next_task = pick_next_highest_task_rt(rq, -1);
  422. if (!next_task)
  423. return 0;
  424. retry:
  425. if (unlikely(next_task == rq->curr)) {
  426. WARN_ON(1);
  427. return 0;
  428. }
  429. /*
  430. * It's possible that the next_task slipped in of
  431. * higher priority than current. If that's the case
  432. * just reschedule current.
  433. */
  434. if (unlikely(next_task->prio < rq->curr->prio)) {
  435. resched_task(rq->curr);
  436. return 0;
  437. }
  438. /* We might release rq lock */
  439. get_task_struct(next_task);
  440. /* find_lock_lowest_rq locks the rq if found */
  441. lowest_rq = find_lock_lowest_rq(next_task, rq);
  442. if (!lowest_rq) {
  443. struct task_struct *task;
  444. /*
  445. * find lock_lowest_rq releases rq->lock
  446. * so it is possible that next_task has changed.
  447. * If it has, then try again.
  448. */
  449. task = pick_next_highest_task_rt(rq, -1);
  450. if (unlikely(task != next_task) && task && paranoid--) {
  451. put_task_struct(next_task);
  452. next_task = task;
  453. goto retry;
  454. }
  455. goto out;
  456. }
  457. assert_spin_locked(&lowest_rq->lock);
  458. deactivate_task(rq, next_task, 0);
  459. set_task_cpu(next_task, lowest_rq->cpu);
  460. activate_task(lowest_rq, next_task, 0);
  461. resched_task(lowest_rq->curr);
  462. spin_unlock(&lowest_rq->lock);
  463. ret = 1;
  464. out:
  465. put_task_struct(next_task);
  466. return ret;
  467. }
  468. /*
  469. * TODO: Currently we just use the second highest prio task on
  470. * the queue, and stop when it can't migrate (or there's
  471. * no more RT tasks). There may be a case where a lower
  472. * priority RT task has a different affinity than the
  473. * higher RT task. In this case the lower RT task could
  474. * possibly be able to migrate where as the higher priority
  475. * RT task could not. We currently ignore this issue.
  476. * Enhancements are welcome!
  477. */
  478. static void push_rt_tasks(struct rq *rq)
  479. {
  480. /* push_rt_task will return true if it moved an RT */
  481. while (push_rt_task(rq))
  482. ;
  483. }
  484. static int pull_rt_task(struct rq *this_rq)
  485. {
  486. struct task_struct *next;
  487. struct task_struct *p;
  488. struct rq *src_rq;
  489. cpumask_t *rto_cpumask;
  490. int this_cpu = this_rq->cpu;
  491. int cpu;
  492. int ret = 0;
  493. assert_spin_locked(&this_rq->lock);
  494. /*
  495. * If cpusets are used, and we have overlapping
  496. * run queue cpusets, then this algorithm may not catch all.
  497. * This is just the price you pay on trying to keep
  498. * dirtying caches down on large SMP machines.
  499. */
  500. if (likely(!rt_overloaded()))
  501. return 0;
  502. next = pick_next_task_rt(this_rq);
  503. rto_cpumask = rt_overload();
  504. for_each_cpu_mask(cpu, *rto_cpumask) {
  505. if (this_cpu == cpu)
  506. continue;
  507. src_rq = cpu_rq(cpu);
  508. if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
  509. /*
  510. * It is possible that overlapping cpusets
  511. * will miss clearing a non overloaded runqueue.
  512. * Clear it now.
  513. */
  514. if (double_lock_balance(this_rq, src_rq)) {
  515. /* unlocked our runqueue lock */
  516. struct task_struct *old_next = next;
  517. next = pick_next_task_rt(this_rq);
  518. if (next != old_next)
  519. ret = 1;
  520. }
  521. if (likely(src_rq->rt.rt_nr_running <= 1))
  522. /*
  523. * Small chance that this_rq->curr changed
  524. * but it's really harmless here.
  525. */
  526. rt_clear_overload(this_rq);
  527. else
  528. /*
  529. * Heh, the src_rq is now overloaded, since
  530. * we already have the src_rq lock, go straight
  531. * to pulling tasks from it.
  532. */
  533. goto try_pulling;
  534. spin_unlock(&src_rq->lock);
  535. continue;
  536. }
  537. /*
  538. * We can potentially drop this_rq's lock in
  539. * double_lock_balance, and another CPU could
  540. * steal our next task - hence we must cause
  541. * the caller to recalculate the next task
  542. * in that case:
  543. */
  544. if (double_lock_balance(this_rq, src_rq)) {
  545. struct task_struct *old_next = next;
  546. next = pick_next_task_rt(this_rq);
  547. if (next != old_next)
  548. ret = 1;
  549. }
  550. /*
  551. * Are there still pullable RT tasks?
  552. */
  553. if (src_rq->rt.rt_nr_running <= 1) {
  554. spin_unlock(&src_rq->lock);
  555. continue;
  556. }
  557. try_pulling:
  558. p = pick_next_highest_task_rt(src_rq, this_cpu);
  559. /*
  560. * Do we have an RT task that preempts
  561. * the to-be-scheduled task?
  562. */
  563. if (p && (!next || (p->prio < next->prio))) {
  564. WARN_ON(p == src_rq->curr);
  565. WARN_ON(!p->se.on_rq);
  566. /*
  567. * There's a chance that p is higher in priority
  568. * than what's currently running on its cpu.
  569. * This is just that p is wakeing up and hasn't
  570. * had a chance to schedule. We only pull
  571. * p if it is lower in priority than the
  572. * current task on the run queue or
  573. * this_rq next task is lower in prio than
  574. * the current task on that rq.
  575. */
  576. if (p->prio < src_rq->curr->prio ||
  577. (next && next->prio < src_rq->curr->prio))
  578. goto bail;
  579. ret = 1;
  580. deactivate_task(src_rq, p, 0);
  581. set_task_cpu(p, this_cpu);
  582. activate_task(this_rq, p, 0);
  583. /*
  584. * We continue with the search, just in
  585. * case there's an even higher prio task
  586. * in another runqueue. (low likelyhood
  587. * but possible)
  588. */
  589. /*
  590. * Update next so that we won't pick a task
  591. * on another cpu with a priority lower (or equal)
  592. * than the one we just picked.
  593. */
  594. next = p;
  595. }
  596. bail:
  597. spin_unlock(&src_rq->lock);
  598. }
  599. return ret;
  600. }
  601. static void schedule_balance_rt(struct rq *rq,
  602. struct task_struct *prev)
  603. {
  604. /* Try to pull RT tasks here if we lower this rq's prio */
  605. if (unlikely(rt_task(prev)) &&
  606. rq->rt.highest_prio > prev->prio)
  607. pull_rt_task(rq);
  608. }
  609. static void schedule_tail_balance_rt(struct rq *rq)
  610. {
  611. /*
  612. * If we have more than one rt_task queued, then
  613. * see if we can push the other rt_tasks off to other CPUS.
  614. * Note we may release the rq lock, and since
  615. * the lock was owned by prev, we need to release it
  616. * first via finish_lock_switch and then reaquire it here.
  617. */
  618. if (unlikely(rq->rt.overloaded)) {
  619. spin_lock_irq(&rq->lock);
  620. push_rt_tasks(rq);
  621. spin_unlock_irq(&rq->lock);
  622. }
  623. }
  624. static void wakeup_balance_rt(struct rq *rq, struct task_struct *p)
  625. {
  626. if (unlikely(rt_task(p)) &&
  627. !task_running(rq, p) &&
  628. (p->prio >= rq->rt.highest_prio) &&
  629. rq->rt.overloaded)
  630. push_rt_tasks(rq);
  631. }
  632. static unsigned long
  633. load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  634. unsigned long max_load_move,
  635. struct sched_domain *sd, enum cpu_idle_type idle,
  636. int *all_pinned, int *this_best_prio)
  637. {
  638. /* don't touch RT tasks */
  639. return 0;
  640. }
  641. static int
  642. move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  643. struct sched_domain *sd, enum cpu_idle_type idle)
  644. {
  645. /* don't touch RT tasks */
  646. return 0;
  647. }
  648. static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
  649. {
  650. int weight = cpus_weight(*new_mask);
  651. BUG_ON(!rt_task(p));
  652. /*
  653. * Update the migration status of the RQ if we have an RT task
  654. * which is running AND changing its weight value.
  655. */
  656. if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
  657. struct rq *rq = task_rq(p);
  658. if ((p->nr_cpus_allowed <= 1) && (weight > 1))
  659. rq->rt.rt_nr_migratory++;
  660. else if((p->nr_cpus_allowed > 1) && (weight <= 1)) {
  661. BUG_ON(!rq->rt.rt_nr_migratory);
  662. rq->rt.rt_nr_migratory--;
  663. }
  664. update_rt_migration(rq);
  665. }
  666. p->cpus_allowed = *new_mask;
  667. p->nr_cpus_allowed = weight;
  668. }
  669. #else /* CONFIG_SMP */
  670. # define schedule_tail_balance_rt(rq) do { } while (0)
  671. # define schedule_balance_rt(rq, prev) do { } while (0)
  672. # define wakeup_balance_rt(rq, p) do { } while (0)
  673. #endif /* CONFIG_SMP */
  674. static void task_tick_rt(struct rq *rq, struct task_struct *p)
  675. {
  676. update_curr_rt(rq);
  677. /*
  678. * RR tasks need a special form of timeslice management.
  679. * FIFO tasks have no timeslices.
  680. */
  681. if (p->policy != SCHED_RR)
  682. return;
  683. if (--p->time_slice)
  684. return;
  685. p->time_slice = DEF_TIMESLICE;
  686. /*
  687. * Requeue to the end of queue if we are not the only element
  688. * on the queue:
  689. */
  690. if (p->run_list.prev != p->run_list.next) {
  691. requeue_task_rt(rq, p);
  692. set_tsk_need_resched(p);
  693. }
  694. }
  695. static void set_curr_task_rt(struct rq *rq)
  696. {
  697. struct task_struct *p = rq->curr;
  698. p->se.exec_start = rq->clock;
  699. }
  700. const struct sched_class rt_sched_class = {
  701. .next = &fair_sched_class,
  702. .enqueue_task = enqueue_task_rt,
  703. .dequeue_task = dequeue_task_rt,
  704. .yield_task = yield_task_rt,
  705. #ifdef CONFIG_SMP
  706. .select_task_rq = select_task_rq_rt,
  707. #endif /* CONFIG_SMP */
  708. .check_preempt_curr = check_preempt_curr_rt,
  709. .pick_next_task = pick_next_task_rt,
  710. .put_prev_task = put_prev_task_rt,
  711. #ifdef CONFIG_SMP
  712. .load_balance = load_balance_rt,
  713. .move_one_task = move_one_task_rt,
  714. .set_cpus_allowed = set_cpus_allowed_rt,
  715. #endif
  716. .set_curr_task = set_curr_task_rt,
  717. .task_tick = task_tick_rt,
  718. };