sched_rt.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /*
  2. * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  3. * policies)
  4. */
  5. /*
  6. * Update the current task's runtime statistics. Skip current tasks that
  7. * are not in our scheduling class.
  8. */
  9. static void update_curr_rt(struct rq *rq)
  10. {
  11. struct task_struct *curr = rq->curr;
  12. u64 delta_exec;
  13. if (!task_has_rt_policy(curr))
  14. return;
  15. delta_exec = rq->clock - curr->se.exec_start;
  16. if (unlikely((s64)delta_exec < 0))
  17. delta_exec = 0;
  18. schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
  19. curr->se.sum_exec_runtime += delta_exec;
  20. curr->se.exec_start = rq->clock;
  21. cpuacct_charge(curr, delta_exec);
  22. }
  23. static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
  24. {
  25. WARN_ON(!rt_task(p));
  26. rq->rt.rt_nr_running++;
  27. #ifdef CONFIG_SMP
  28. if (p->prio < rq->rt.highest_prio)
  29. rq->rt.highest_prio = p->prio;
  30. #endif /* CONFIG_SMP */
  31. }
  32. static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
  33. {
  34. WARN_ON(!rt_task(p));
  35. WARN_ON(!rq->rt.rt_nr_running);
  36. rq->rt.rt_nr_running--;
  37. #ifdef CONFIG_SMP
  38. if (rq->rt.rt_nr_running) {
  39. struct rt_prio_array *array;
  40. WARN_ON(p->prio < rq->rt.highest_prio);
  41. if (p->prio == rq->rt.highest_prio) {
  42. /* recalculate */
  43. array = &rq->rt.active;
  44. rq->rt.highest_prio =
  45. sched_find_first_bit(array->bitmap);
  46. } /* otherwise leave rq->highest prio alone */
  47. } else
  48. rq->rt.highest_prio = MAX_RT_PRIO;
  49. #endif /* CONFIG_SMP */
  50. }
  51. static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
  52. {
  53. struct rt_prio_array *array = &rq->rt.active;
  54. list_add_tail(&p->run_list, array->queue + p->prio);
  55. __set_bit(p->prio, array->bitmap);
  56. inc_cpu_load(rq, p->se.load.weight);
  57. inc_rt_tasks(p, rq);
  58. }
  59. /*
  60. * Adding/removing a task to/from a priority array:
  61. */
  62. static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
  63. {
  64. struct rt_prio_array *array = &rq->rt.active;
  65. update_curr_rt(rq);
  66. list_del(&p->run_list);
  67. if (list_empty(array->queue + p->prio))
  68. __clear_bit(p->prio, array->bitmap);
  69. dec_cpu_load(rq, p->se.load.weight);
  70. dec_rt_tasks(p, rq);
  71. }
  72. /*
  73. * Put task to the end of the run list without the overhead of dequeue
  74. * followed by enqueue.
  75. */
  76. static void requeue_task_rt(struct rq *rq, struct task_struct *p)
  77. {
  78. struct rt_prio_array *array = &rq->rt.active;
  79. list_move_tail(&p->run_list, array->queue + p->prio);
  80. }
  81. static void
  82. yield_task_rt(struct rq *rq)
  83. {
  84. requeue_task_rt(rq, rq->curr);
  85. }
  86. /*
  87. * Preempt the current task with a newly woken task if needed:
  88. */
  89. static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
  90. {
  91. if (p->prio < rq->curr->prio)
  92. resched_task(rq->curr);
  93. }
  94. static struct task_struct *pick_next_task_rt(struct rq *rq)
  95. {
  96. struct rt_prio_array *array = &rq->rt.active;
  97. struct task_struct *next;
  98. struct list_head *queue;
  99. int idx;
  100. idx = sched_find_first_bit(array->bitmap);
  101. if (idx >= MAX_RT_PRIO)
  102. return NULL;
  103. queue = array->queue + idx;
  104. next = list_entry(queue->next, struct task_struct, run_list);
  105. next->se.exec_start = rq->clock;
  106. return next;
  107. }
  108. static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
  109. {
  110. update_curr_rt(rq);
  111. p->se.exec_start = 0;
  112. }
  113. #ifdef CONFIG_SMP
  114. /* Only try algorithms three times */
  115. #define RT_MAX_TRIES 3
  116. static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
  117. static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
  118. /* Return the second highest RT task, NULL otherwise */
  119. static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
  120. {
  121. struct rt_prio_array *array = &rq->rt.active;
  122. struct task_struct *next;
  123. struct list_head *queue;
  124. int idx;
  125. assert_spin_locked(&rq->lock);
  126. if (likely(rq->rt.rt_nr_running < 2))
  127. return NULL;
  128. idx = sched_find_first_bit(array->bitmap);
  129. if (unlikely(idx >= MAX_RT_PRIO)) {
  130. WARN_ON(1); /* rt_nr_running is bad */
  131. return NULL;
  132. }
  133. queue = array->queue + idx;
  134. next = list_entry(queue->next, struct task_struct, run_list);
  135. if (unlikely(next != rq->curr))
  136. return next;
  137. if (queue->next->next != queue) {
  138. /* same prio task */
  139. next = list_entry(queue->next->next, struct task_struct, run_list);
  140. return next;
  141. }
  142. /* slower, but more flexible */
  143. idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
  144. if (unlikely(idx >= MAX_RT_PRIO)) {
  145. WARN_ON(1); /* rt_nr_running was 2 and above! */
  146. return NULL;
  147. }
  148. queue = array->queue + idx;
  149. next = list_entry(queue->next, struct task_struct, run_list);
  150. return next;
  151. }
  152. static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
  153. /* Will lock the rq it finds */
  154. static struct rq *find_lock_lowest_rq(struct task_struct *task,
  155. struct rq *this_rq)
  156. {
  157. struct rq *lowest_rq = NULL;
  158. int cpu;
  159. int tries;
  160. cpumask_t *cpu_mask = &__get_cpu_var(local_cpu_mask);
  161. cpus_and(*cpu_mask, cpu_online_map, task->cpus_allowed);
  162. for (tries = 0; tries < RT_MAX_TRIES; tries++) {
  163. /*
  164. * Scan each rq for the lowest prio.
  165. */
  166. for_each_cpu_mask(cpu, *cpu_mask) {
  167. struct rq *rq = &per_cpu(runqueues, cpu);
  168. if (cpu == this_rq->cpu)
  169. continue;
  170. /* We look for lowest RT prio or non-rt CPU */
  171. if (rq->rt.highest_prio >= MAX_RT_PRIO) {
  172. lowest_rq = rq;
  173. break;
  174. }
  175. /* no locking for now */
  176. if (rq->rt.highest_prio > task->prio &&
  177. (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
  178. lowest_rq = rq;
  179. }
  180. }
  181. if (!lowest_rq)
  182. break;
  183. /* if the prio of this runqueue changed, try again */
  184. if (double_lock_balance(this_rq, lowest_rq)) {
  185. /*
  186. * We had to unlock the run queue. In
  187. * the mean time, task could have
  188. * migrated already or had its affinity changed.
  189. * Also make sure that it wasn't scheduled on its rq.
  190. */
  191. if (unlikely(task_rq(task) != this_rq ||
  192. !cpu_isset(lowest_rq->cpu, task->cpus_allowed) ||
  193. task_running(this_rq, task) ||
  194. !task->se.on_rq)) {
  195. spin_unlock(&lowest_rq->lock);
  196. lowest_rq = NULL;
  197. break;
  198. }
  199. }
  200. /* If this rq is still suitable use it. */
  201. if (lowest_rq->rt.highest_prio > task->prio)
  202. break;
  203. /* try again */
  204. spin_unlock(&lowest_rq->lock);
  205. lowest_rq = NULL;
  206. }
  207. return lowest_rq;
  208. }
  209. /*
  210. * If the current CPU has more than one RT task, see if the non
  211. * running task can migrate over to a CPU that is running a task
  212. * of lesser priority.
  213. */
  214. static int push_rt_task(struct rq *this_rq)
  215. {
  216. struct task_struct *next_task;
  217. struct rq *lowest_rq;
  218. int ret = 0;
  219. int paranoid = RT_MAX_TRIES;
  220. assert_spin_locked(&this_rq->lock);
  221. next_task = pick_next_highest_task_rt(this_rq);
  222. if (!next_task)
  223. return 0;
  224. retry:
  225. if (unlikely(next_task == this_rq->curr))
  226. return 0;
  227. /*
  228. * It's possible that the next_task slipped in of
  229. * higher priority than current. If that's the case
  230. * just reschedule current.
  231. */
  232. if (unlikely(next_task->prio < this_rq->curr->prio)) {
  233. resched_task(this_rq->curr);
  234. return 0;
  235. }
  236. /* We might release this_rq lock */
  237. get_task_struct(next_task);
  238. /* find_lock_lowest_rq locks the rq if found */
  239. lowest_rq = find_lock_lowest_rq(next_task, this_rq);
  240. if (!lowest_rq) {
  241. struct task_struct *task;
  242. /*
  243. * find lock_lowest_rq releases this_rq->lock
  244. * so it is possible that next_task has changed.
  245. * If it has, then try again.
  246. */
  247. task = pick_next_highest_task_rt(this_rq);
  248. if (unlikely(task != next_task) && task && paranoid--) {
  249. put_task_struct(next_task);
  250. next_task = task;
  251. goto retry;
  252. }
  253. goto out;
  254. }
  255. assert_spin_locked(&lowest_rq->lock);
  256. deactivate_task(this_rq, next_task, 0);
  257. set_task_cpu(next_task, lowest_rq->cpu);
  258. activate_task(lowest_rq, next_task, 0);
  259. resched_task(lowest_rq->curr);
  260. spin_unlock(&lowest_rq->lock);
  261. ret = 1;
  262. out:
  263. put_task_struct(next_task);
  264. return ret;
  265. }
  266. /*
  267. * TODO: Currently we just use the second highest prio task on
  268. * the queue, and stop when it can't migrate (or there's
  269. * no more RT tasks). There may be a case where a lower
  270. * priority RT task has a different affinity than the
  271. * higher RT task. In this case the lower RT task could
  272. * possibly be able to migrate where as the higher priority
  273. * RT task could not. We currently ignore this issue.
  274. * Enhancements are welcome!
  275. */
  276. static void push_rt_tasks(struct rq *rq)
  277. {
  278. /* push_rt_task will return true if it moved an RT */
  279. while (push_rt_task(rq))
  280. ;
  281. }
  282. static void schedule_tail_balance_rt(struct rq *rq)
  283. {
  284. /*
  285. * If we have more than one rt_task queued, then
  286. * see if we can push the other rt_tasks off to other CPUS.
  287. * Note we may release the rq lock, and since
  288. * the lock was owned by prev, we need to release it
  289. * first via finish_lock_switch and then reaquire it here.
  290. */
  291. if (unlikely(rq->rt.rt_nr_running > 1)) {
  292. spin_lock_irq(&rq->lock);
  293. push_rt_tasks(rq);
  294. spin_unlock_irq(&rq->lock);
  295. }
  296. }
  297. /*
  298. * Load-balancing iterator. Note: while the runqueue stays locked
  299. * during the whole iteration, the current task might be
  300. * dequeued so the iterator has to be dequeue-safe. Here we
  301. * achieve that by always pre-iterating before returning
  302. * the current task:
  303. */
  304. static struct task_struct *load_balance_start_rt(void *arg)
  305. {
  306. struct rq *rq = arg;
  307. struct rt_prio_array *array = &rq->rt.active;
  308. struct list_head *head, *curr;
  309. struct task_struct *p;
  310. int idx;
  311. idx = sched_find_first_bit(array->bitmap);
  312. if (idx >= MAX_RT_PRIO)
  313. return NULL;
  314. head = array->queue + idx;
  315. curr = head->prev;
  316. p = list_entry(curr, struct task_struct, run_list);
  317. curr = curr->prev;
  318. rq->rt.rt_load_balance_idx = idx;
  319. rq->rt.rt_load_balance_head = head;
  320. rq->rt.rt_load_balance_curr = curr;
  321. return p;
  322. }
  323. static struct task_struct *load_balance_next_rt(void *arg)
  324. {
  325. struct rq *rq = arg;
  326. struct rt_prio_array *array = &rq->rt.active;
  327. struct list_head *head, *curr;
  328. struct task_struct *p;
  329. int idx;
  330. idx = rq->rt.rt_load_balance_idx;
  331. head = rq->rt.rt_load_balance_head;
  332. curr = rq->rt.rt_load_balance_curr;
  333. /*
  334. * If we arrived back to the head again then
  335. * iterate to the next queue (if any):
  336. */
  337. if (unlikely(head == curr)) {
  338. int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
  339. if (next_idx >= MAX_RT_PRIO)
  340. return NULL;
  341. idx = next_idx;
  342. head = array->queue + idx;
  343. curr = head->prev;
  344. rq->rt.rt_load_balance_idx = idx;
  345. rq->rt.rt_load_balance_head = head;
  346. }
  347. p = list_entry(curr, struct task_struct, run_list);
  348. curr = curr->prev;
  349. rq->rt.rt_load_balance_curr = curr;
  350. return p;
  351. }
  352. static unsigned long
  353. load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  354. unsigned long max_load_move,
  355. struct sched_domain *sd, enum cpu_idle_type idle,
  356. int *all_pinned, int *this_best_prio)
  357. {
  358. struct rq_iterator rt_rq_iterator;
  359. rt_rq_iterator.start = load_balance_start_rt;
  360. rt_rq_iterator.next = load_balance_next_rt;
  361. /* pass 'busiest' rq argument into
  362. * load_balance_[start|next]_rt iterators
  363. */
  364. rt_rq_iterator.arg = busiest;
  365. return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
  366. idle, all_pinned, this_best_prio, &rt_rq_iterator);
  367. }
  368. static int
  369. move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
  370. struct sched_domain *sd, enum cpu_idle_type idle)
  371. {
  372. struct rq_iterator rt_rq_iterator;
  373. rt_rq_iterator.start = load_balance_start_rt;
  374. rt_rq_iterator.next = load_balance_next_rt;
  375. rt_rq_iterator.arg = busiest;
  376. return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
  377. &rt_rq_iterator);
  378. }
  379. #else /* CONFIG_SMP */
  380. # define schedule_tail_balance_rt(rq) do { } while (0)
  381. #endif /* CONFIG_SMP */
  382. static void task_tick_rt(struct rq *rq, struct task_struct *p)
  383. {
  384. update_curr_rt(rq);
  385. /*
  386. * RR tasks need a special form of timeslice management.
  387. * FIFO tasks have no timeslices.
  388. */
  389. if (p->policy != SCHED_RR)
  390. return;
  391. if (--p->time_slice)
  392. return;
  393. p->time_slice = DEF_TIMESLICE;
  394. /*
  395. * Requeue to the end of queue if we are not the only element
  396. * on the queue:
  397. */
  398. if (p->run_list.prev != p->run_list.next) {
  399. requeue_task_rt(rq, p);
  400. set_tsk_need_resched(p);
  401. }
  402. }
  403. static void set_curr_task_rt(struct rq *rq)
  404. {
  405. struct task_struct *p = rq->curr;
  406. p->se.exec_start = rq->clock;
  407. }
  408. const struct sched_class rt_sched_class = {
  409. .next = &fair_sched_class,
  410. .enqueue_task = enqueue_task_rt,
  411. .dequeue_task = dequeue_task_rt,
  412. .yield_task = yield_task_rt,
  413. .check_preempt_curr = check_preempt_curr_rt,
  414. .pick_next_task = pick_next_task_rt,
  415. .put_prev_task = put_prev_task_rt,
  416. #ifdef CONFIG_SMP
  417. .load_balance = load_balance_rt,
  418. .move_one_task = move_one_task_rt,
  419. #endif
  420. .set_curr_task = set_curr_task_rt,
  421. .task_tick = task_tick_rt,
  422. };