sched.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. /* sched.c - SPU scheduler.
  2. *
  3. * Copyright (C) IBM 2005
  4. * Author: Mark Nutter <mnutter@us.ibm.com>
  5. *
  6. * SPU scheduler, based on Linux thread priority. For now use
  7. * a simple "cooperative" yield model with no preemption. SPU
  8. * scheduling will eventually be preemptive: When a thread with
  9. * a higher static priority gets ready to run, then an active SPU
  10. * context will be preempted and returned to the waitq.
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License as published by
  14. * the Free Software Foundation; either version 2, or (at your option)
  15. * any later version.
  16. *
  17. * This program is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  20. * GNU General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU General Public License
  23. * along with this program; if not, write to the Free Software
  24. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  25. */
  26. #undef DEBUG
  27. #include <linux/config.h>
  28. #include <linux/module.h>
  29. #include <linux/errno.h>
  30. #include <linux/sched.h>
  31. #include <linux/kernel.h>
  32. #include <linux/mm.h>
  33. #include <linux/completion.h>
  34. #include <linux/vmalloc.h>
  35. #include <linux/smp.h>
  36. #include <linux/smp_lock.h>
  37. #include <linux/stddef.h>
  38. #include <linux/unistd.h>
  39. #include <asm/io.h>
  40. #include <asm/mmu_context.h>
  41. #include <asm/spu.h>
  42. #include <asm/spu_csa.h>
  43. #include "spufs.h"
  44. #define SPU_MIN_TIMESLICE (100 * HZ / 1000)
  45. #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
  46. struct spu_prio_array {
  47. atomic_t nr_blocked;
  48. unsigned long bitmap[SPU_BITMAP_SIZE];
  49. wait_queue_head_t waitq[MAX_PRIO];
  50. };
  51. /* spu_runqueue - This is the main runqueue data structure for SPUs. */
  52. struct spu_runqueue {
  53. struct semaphore sem;
  54. unsigned long nr_active;
  55. unsigned long nr_idle;
  56. unsigned long nr_switches;
  57. struct list_head active_list;
  58. struct list_head idle_list;
  59. struct spu_prio_array prio;
  60. };
  61. static struct spu_runqueue *spu_runqueues = NULL;
  62. static inline struct spu_runqueue *spu_rq(void)
  63. {
  64. /* Future: make this a per-NODE array,
  65. * and use cpu_to_node(smp_processor_id())
  66. */
  67. return spu_runqueues;
  68. }
  69. static inline struct spu *del_idle(struct spu_runqueue *rq)
  70. {
  71. struct spu *spu;
  72. BUG_ON(rq->nr_idle <= 0);
  73. BUG_ON(list_empty(&rq->idle_list));
  74. /* Future: Move SPU out of low-power SRI state. */
  75. spu = list_entry(rq->idle_list.next, struct spu, sched_list);
  76. list_del_init(&spu->sched_list);
  77. rq->nr_idle--;
  78. return spu;
  79. }
  80. static inline void del_active(struct spu_runqueue *rq, struct spu *spu)
  81. {
  82. BUG_ON(rq->nr_active <= 0);
  83. BUG_ON(list_empty(&rq->active_list));
  84. list_del_init(&spu->sched_list);
  85. rq->nr_active--;
  86. }
  87. static inline void add_idle(struct spu_runqueue *rq, struct spu *spu)
  88. {
  89. /* Future: Put SPU into low-power SRI state. */
  90. list_add_tail(&spu->sched_list, &rq->idle_list);
  91. rq->nr_idle++;
  92. }
  93. static inline void add_active(struct spu_runqueue *rq, struct spu *spu)
  94. {
  95. rq->nr_active++;
  96. rq->nr_switches++;
  97. list_add_tail(&spu->sched_list, &rq->active_list);
  98. }
  99. static void prio_wakeup(struct spu_runqueue *rq)
  100. {
  101. if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) {
  102. int best = sched_find_first_bit(rq->prio.bitmap);
  103. if (best < MAX_PRIO) {
  104. wait_queue_head_t *wq = &rq->prio.waitq[best];
  105. wake_up_interruptible_nr(wq, 1);
  106. }
  107. }
  108. }
  109. static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx,
  110. u64 flags)
  111. {
  112. int prio = current->prio;
  113. wait_queue_head_t *wq = &rq->prio.waitq[prio];
  114. DEFINE_WAIT(wait);
  115. __set_bit(prio, rq->prio.bitmap);
  116. atomic_inc(&rq->prio.nr_blocked);
  117. prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE);
  118. if (!signal_pending(current)) {
  119. up(&rq->sem);
  120. up_write(&ctx->state_sema);
  121. pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
  122. current->pid, current->prio);
  123. schedule();
  124. down_write(&ctx->state_sema);
  125. down(&rq->sem);
  126. }
  127. finish_wait(wq, &wait);
  128. atomic_dec(&rq->prio.nr_blocked);
  129. if (!waitqueue_active(wq))
  130. __clear_bit(prio, rq->prio.bitmap);
  131. }
  132. static inline int is_best_prio(struct spu_runqueue *rq)
  133. {
  134. int best_prio;
  135. best_prio = sched_find_first_bit(rq->prio.bitmap);
  136. return (current->prio < best_prio) ? 1 : 0;
  137. }
  138. static inline void mm_needs_global_tlbie(struct mm_struct *mm)
  139. {
  140. /* Global TLBIE broadcast required with SPEs. */
  141. #if (NR_CPUS > 1)
  142. __cpus_setall(&mm->cpu_vm_mask, NR_CPUS);
  143. #else
  144. __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */
  145. #endif
  146. }
  147. static inline void bind_context(struct spu *spu, struct spu_context *ctx)
  148. {
  149. pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid,
  150. spu->number);
  151. spu->ctx = ctx;
  152. spu->flags = 0;
  153. ctx->flags = 0;
  154. ctx->spu = spu;
  155. ctx->ops = &spu_hw_ops;
  156. spu->pid = current->pid;
  157. spu->prio = current->prio;
  158. spu->mm = ctx->owner;
  159. mm_needs_global_tlbie(spu->mm);
  160. spu->ibox_callback = spufs_ibox_callback;
  161. spu->wbox_callback = spufs_wbox_callback;
  162. spu->stop_callback = spufs_stop_callback;
  163. mb();
  164. spu_unmap_mappings(ctx);
  165. spu_restore(&ctx->csa, spu);
  166. spu->timestamp = jiffies;
  167. }
  168. static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
  169. {
  170. pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__,
  171. spu->pid, spu->number);
  172. spu_unmap_mappings(ctx);
  173. spu_save(&ctx->csa, spu);
  174. spu->timestamp = jiffies;
  175. ctx->state = SPU_STATE_SAVED;
  176. spu->ibox_callback = NULL;
  177. spu->wbox_callback = NULL;
  178. spu->stop_callback = NULL;
  179. spu->mm = NULL;
  180. spu->pid = 0;
  181. spu->prio = MAX_PRIO;
  182. ctx->ops = &spu_backing_ops;
  183. ctx->spu = NULL;
  184. ctx->flags = 0;
  185. spu->flags = 0;
  186. spu->ctx = NULL;
  187. }
  188. static void spu_reaper(void *data)
  189. {
  190. struct spu_context *ctx = data;
  191. struct spu *spu;
  192. down_write(&ctx->state_sema);
  193. spu = ctx->spu;
  194. if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
  195. if (atomic_read(&spu->rq->prio.nr_blocked)) {
  196. pr_debug("%s: spu=%d\n", __func__, spu->number);
  197. ctx->ops->runcntl_stop(ctx);
  198. spu_deactivate(ctx);
  199. wake_up_all(&ctx->stop_wq);
  200. } else {
  201. clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
  202. }
  203. }
  204. up_write(&ctx->state_sema);
  205. put_spu_context(ctx);
  206. }
  207. static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu)
  208. {
  209. struct spu_context *ctx = get_spu_context(spu->ctx);
  210. unsigned long now = jiffies;
  211. unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE;
  212. set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
  213. INIT_WORK(&ctx->reap_work, spu_reaper, ctx);
  214. if (time_after(now, expire))
  215. schedule_work(&ctx->reap_work);
  216. else
  217. schedule_delayed_work(&ctx->reap_work, expire - now);
  218. }
  219. static void check_preempt_active(struct spu_runqueue *rq)
  220. {
  221. struct list_head *p;
  222. struct spu *worst = NULL;
  223. list_for_each(p, &rq->active_list) {
  224. struct spu *spu = list_entry(p, struct spu, sched_list);
  225. struct spu_context *ctx = spu->ctx;
  226. if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
  227. if (!worst || (spu->prio > worst->prio)) {
  228. worst = spu;
  229. }
  230. }
  231. }
  232. if (worst && (current->prio < worst->prio))
  233. schedule_spu_reaper(rq, worst);
  234. }
  235. static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags)
  236. {
  237. struct spu_runqueue *rq;
  238. struct spu *spu = NULL;
  239. rq = spu_rq();
  240. down(&rq->sem);
  241. for (;;) {
  242. if (rq->nr_idle > 0) {
  243. if (is_best_prio(rq)) {
  244. /* Fall through. */
  245. spu = del_idle(rq);
  246. break;
  247. } else {
  248. prio_wakeup(rq);
  249. up(&rq->sem);
  250. yield();
  251. if (signal_pending(current)) {
  252. return NULL;
  253. }
  254. rq = spu_rq();
  255. down(&rq->sem);
  256. continue;
  257. }
  258. } else {
  259. check_preempt_active(rq);
  260. prio_wait(rq, ctx, flags);
  261. if (signal_pending(current)) {
  262. prio_wakeup(rq);
  263. spu = NULL;
  264. break;
  265. }
  266. continue;
  267. }
  268. }
  269. up(&rq->sem);
  270. return spu;
  271. }
  272. static void put_idle_spu(struct spu *spu)
  273. {
  274. struct spu_runqueue *rq = spu->rq;
  275. down(&rq->sem);
  276. add_idle(rq, spu);
  277. prio_wakeup(rq);
  278. up(&rq->sem);
  279. }
  280. static int get_active_spu(struct spu *spu)
  281. {
  282. struct spu_runqueue *rq = spu->rq;
  283. struct list_head *p;
  284. struct spu *tmp;
  285. int rc = 0;
  286. down(&rq->sem);
  287. list_for_each(p, &rq->active_list) {
  288. tmp = list_entry(p, struct spu, sched_list);
  289. if (tmp == spu) {
  290. del_active(rq, spu);
  291. rc = 1;
  292. break;
  293. }
  294. }
  295. up(&rq->sem);
  296. return rc;
  297. }
  298. static void put_active_spu(struct spu *spu)
  299. {
  300. struct spu_runqueue *rq = spu->rq;
  301. down(&rq->sem);
  302. add_active(rq, spu);
  303. up(&rq->sem);
  304. }
  305. /* Lock order:
  306. * spu_activate() & spu_deactivate() require the
  307. * caller to have down_write(&ctx->state_sema).
  308. *
  309. * The rq->sem is breifly held (inside or outside a
  310. * given ctx lock) for list management, but is never
  311. * held during save/restore.
  312. */
  313. int spu_activate(struct spu_context *ctx, u64 flags)
  314. {
  315. struct spu *spu;
  316. if (ctx->spu)
  317. return 0;
  318. spu = get_idle_spu(ctx, flags);
  319. if (!spu)
  320. return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
  321. bind_context(spu, ctx);
  322. /*
  323. * We're likely to wait for interrupts on the same
  324. * CPU that we are now on, so send them here.
  325. */
  326. spu_irq_setaffinity(spu, raw_smp_processor_id());
  327. put_active_spu(spu);
  328. return 0;
  329. }
  330. void spu_deactivate(struct spu_context *ctx)
  331. {
  332. struct spu *spu;
  333. int needs_idle;
  334. spu = ctx->spu;
  335. if (!spu)
  336. return;
  337. needs_idle = get_active_spu(spu);
  338. unbind_context(spu, ctx);
  339. if (needs_idle)
  340. put_idle_spu(spu);
  341. }
  342. void spu_yield(struct spu_context *ctx)
  343. {
  344. struct spu *spu;
  345. int need_yield = 0;
  346. down_write(&ctx->state_sema);
  347. spu = ctx->spu;
  348. if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) {
  349. pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number);
  350. spu_deactivate(ctx);
  351. ctx->state = SPU_STATE_SAVED;
  352. need_yield = 1;
  353. } else if (spu) {
  354. spu->prio = MAX_PRIO;
  355. }
  356. up_write(&ctx->state_sema);
  357. if (unlikely(need_yield))
  358. yield();
  359. }
  360. int __init spu_sched_init(void)
  361. {
  362. struct spu_runqueue *rq;
  363. struct spu *spu;
  364. int i;
  365. rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL);
  366. if (!rq) {
  367. printk(KERN_WARNING "%s: Unable to allocate runqueues.\n",
  368. __FUNCTION__);
  369. return 1;
  370. }
  371. memset(rq, 0, sizeof(struct spu_runqueue));
  372. init_MUTEX(&rq->sem);
  373. INIT_LIST_HEAD(&rq->active_list);
  374. INIT_LIST_HEAD(&rq->idle_list);
  375. rq->nr_active = 0;
  376. rq->nr_idle = 0;
  377. rq->nr_switches = 0;
  378. atomic_set(&rq->prio.nr_blocked, 0);
  379. for (i = 0; i < MAX_PRIO; i++) {
  380. init_waitqueue_head(&rq->prio.waitq[i]);
  381. __clear_bit(i, rq->prio.bitmap);
  382. }
  383. __set_bit(MAX_PRIO, rq->prio.bitmap);
  384. for (;;) {
  385. spu = spu_alloc();
  386. if (!spu)
  387. break;
  388. pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number);
  389. add_idle(rq, spu);
  390. spu->rq = rq;
  391. spu->timestamp = jiffies;
  392. }
  393. if (!rq->nr_idle) {
  394. printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__);
  395. kfree(rq);
  396. return 1;
  397. }
  398. return 0;
  399. }
  400. void __exit spu_sched_exit(void)
  401. {
  402. struct spu_runqueue *rq = spu_rq();
  403. struct spu *spu;
  404. if (!rq) {
  405. printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__);
  406. return;
  407. }
  408. while (rq->nr_idle > 0) {
  409. spu = del_idle(rq);
  410. if (!spu)
  411. break;
  412. spu_free(spu);
  413. }
  414. kfree(rq);
  415. }