sched.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. /* sched.c - SPU scheduler.
  2. *
  3. * Copyright (C) IBM 2005
  4. * Author: Mark Nutter <mnutter@us.ibm.com>
  5. *
  6. * 2006-03-31 NUMA domains added.
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2, or (at your option)
  11. * any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. */
  22. #undef DEBUG
  23. #include <linux/module.h>
  24. #include <linux/errno.h>
  25. #include <linux/sched.h>
  26. #include <linux/kernel.h>
  27. #include <linux/mm.h>
  28. #include <linux/completion.h>
  29. #include <linux/vmalloc.h>
  30. #include <linux/smp.h>
  31. #include <linux/smp_lock.h>
  32. #include <linux/stddef.h>
  33. #include <linux/unistd.h>
  34. #include <linux/numa.h>
  35. #include <linux/mutex.h>
  36. #include <linux/notifier.h>
  37. #include <asm/io.h>
  38. #include <asm/mmu_context.h>
  39. #include <asm/spu.h>
  40. #include <asm/spu_csa.h>
  41. #include <asm/spu_priv1.h>
  42. #include "spufs.h"
  43. #define SPU_MIN_TIMESLICE (100 * HZ / 1000)
  44. struct spu_prio_array {
  45. DECLARE_BITMAP(bitmap, MAX_PRIO);
  46. struct list_head runq[MAX_PRIO];
  47. spinlock_t runq_lock;
  48. struct list_head active_list[MAX_NUMNODES];
  49. struct mutex active_mutex[MAX_NUMNODES];
  50. };
  51. static struct spu_prio_array *spu_prio;
  52. static inline int node_allowed(int node)
  53. {
  54. cpumask_t mask;
  55. if (!nr_cpus_node(node))
  56. return 0;
  57. mask = node_to_cpumask(node);
  58. if (!cpus_intersects(mask, current->cpus_allowed))
  59. return 0;
  60. return 1;
  61. }
  62. /**
  63. * spu_add_to_active_list - add spu to active list
  64. * @spu: spu to add to the active list
  65. */
  66. static void spu_add_to_active_list(struct spu *spu)
  67. {
  68. mutex_lock(&spu_prio->active_mutex[spu->node]);
  69. list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
  70. mutex_unlock(&spu_prio->active_mutex[spu->node]);
  71. }
  72. /**
  73. * spu_remove_from_active_list - remove spu from active list
  74. * @spu: spu to remove from the active list
  75. */
  76. static void spu_remove_from_active_list(struct spu *spu)
  77. {
  78. int node = spu->node;
  79. mutex_lock(&spu_prio->active_mutex[node]);
  80. list_del_init(&spu->list);
  81. mutex_unlock(&spu_prio->active_mutex[node]);
  82. }
  83. static inline void mm_needs_global_tlbie(struct mm_struct *mm)
  84. {
  85. int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
  86. /* Global TLBIE broadcast required with SPEs. */
  87. __cpus_setall(&mm->cpu_vm_mask, nr);
  88. }
  89. static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
  90. static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
  91. {
  92. blocking_notifier_call_chain(&spu_switch_notifier,
  93. ctx ? ctx->object_id : 0, spu);
  94. }
  95. int spu_switch_event_register(struct notifier_block * n)
  96. {
  97. return blocking_notifier_chain_register(&spu_switch_notifier, n);
  98. }
  99. int spu_switch_event_unregister(struct notifier_block * n)
  100. {
  101. return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
  102. }
  103. /**
  104. * spu_bind_context - bind spu context to physical spu
  105. * @spu: physical spu to bind to
  106. * @ctx: context to bind
  107. */
  108. static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
  109. {
  110. pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
  111. spu->number, spu->node);
  112. spu->ctx = ctx;
  113. spu->flags = 0;
  114. ctx->spu = spu;
  115. ctx->ops = &spu_hw_ops;
  116. spu->pid = current->pid;
  117. spu->mm = ctx->owner;
  118. mm_needs_global_tlbie(spu->mm);
  119. spu->ibox_callback = spufs_ibox_callback;
  120. spu->wbox_callback = spufs_wbox_callback;
  121. spu->stop_callback = spufs_stop_callback;
  122. spu->mfc_callback = spufs_mfc_callback;
  123. spu->dma_callback = spufs_dma_callback;
  124. mb();
  125. spu_unmap_mappings(ctx);
  126. spu_restore(&ctx->csa, spu);
  127. spu->timestamp = jiffies;
  128. spu_cpu_affinity_set(spu, raw_smp_processor_id());
  129. spu_switch_notify(spu, ctx);
  130. spu_add_to_active_list(spu);
  131. ctx->state = SPU_STATE_RUNNABLE;
  132. }
  133. /**
  134. * spu_unbind_context - unbind spu context from physical spu
  135. * @spu: physical spu to unbind from
  136. * @ctx: context to unbind
  137. */
  138. static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  139. {
  140. pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
  141. spu->pid, spu->number, spu->node);
  142. spu_remove_from_active_list(spu);
  143. spu_switch_notify(spu, NULL);
  144. spu_unmap_mappings(ctx);
  145. spu_save(&ctx->csa, spu);
  146. spu->timestamp = jiffies;
  147. ctx->state = SPU_STATE_SAVED;
  148. spu->ibox_callback = NULL;
  149. spu->wbox_callback = NULL;
  150. spu->stop_callback = NULL;
  151. spu->mfc_callback = NULL;
  152. spu->dma_callback = NULL;
  153. spu->mm = NULL;
  154. spu->pid = 0;
  155. ctx->ops = &spu_backing_ops;
  156. ctx->spu = NULL;
  157. spu->flags = 0;
  158. spu->ctx = NULL;
  159. }
  160. /**
  161. * spu_add_to_rq - add a context to the runqueue
  162. * @ctx: context to add
  163. */
  164. static void spu_add_to_rq(struct spu_context *ctx)
  165. {
  166. spin_lock(&spu_prio->runq_lock);
  167. list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
  168. set_bit(ctx->prio, spu_prio->bitmap);
  169. spin_unlock(&spu_prio->runq_lock);
  170. }
  171. /**
  172. * spu_del_from_rq - remove a context from the runqueue
  173. * @ctx: context to remove
  174. */
  175. static void spu_del_from_rq(struct spu_context *ctx)
  176. {
  177. spin_lock(&spu_prio->runq_lock);
  178. list_del_init(&ctx->rq);
  179. if (list_empty(&spu_prio->runq[ctx->prio]))
  180. clear_bit(ctx->prio, spu_prio->bitmap);
  181. spin_unlock(&spu_prio->runq_lock);
  182. }
  183. /**
  184. * spu_grab_context - remove one context from the runqueue
  185. * @prio: priority of the context to be removed
  186. *
  187. * This function removes one context from the runqueue for priority @prio.
  188. * If there is more than one context with the given priority the first
  189. * task on the runqueue will be taken.
  190. *
  191. * Returns the spu_context it just removed.
  192. *
  193. * Must be called with spu_prio->runq_lock held.
  194. */
  195. static struct spu_context *spu_grab_context(int prio)
  196. {
  197. struct list_head *rq = &spu_prio->runq[prio];
  198. if (list_empty(rq))
  199. return NULL;
  200. return list_entry(rq->next, struct spu_context, rq);
  201. }
  202. static void spu_prio_wait(struct spu_context *ctx)
  203. {
  204. DEFINE_WAIT(wait);
  205. set_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
  206. prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
  207. if (!signal_pending(current)) {
  208. mutex_unlock(&ctx->state_mutex);
  209. schedule();
  210. mutex_lock(&ctx->state_mutex);
  211. }
  212. __set_current_state(TASK_RUNNING);
  213. remove_wait_queue(&ctx->stop_wq, &wait);
  214. clear_bit(SPU_SCHED_WAKE, &ctx->sched_flags);
  215. }
  216. /**
  217. * spu_reschedule - try to find a runnable context for a spu
  218. * @spu: spu available
  219. *
  220. * This function is called whenever a spu becomes idle. It looks for the
  221. * most suitable runnable spu context and schedules it for execution.
  222. */
  223. static void spu_reschedule(struct spu *spu)
  224. {
  225. int best;
  226. spu_free(spu);
  227. spin_lock(&spu_prio->runq_lock);
  228. best = sched_find_first_bit(spu_prio->bitmap);
  229. if (best < MAX_PRIO) {
  230. struct spu_context *ctx = spu_grab_context(best);
  231. if (ctx && test_bit(SPU_SCHED_WAKE, &ctx->sched_flags))
  232. wake_up(&ctx->stop_wq);
  233. }
  234. spin_unlock(&spu_prio->runq_lock);
  235. }
  236. static struct spu *spu_get_idle(struct spu_context *ctx)
  237. {
  238. struct spu *spu = NULL;
  239. int node = cpu_to_node(raw_smp_processor_id());
  240. int n;
  241. for (n = 0; n < MAX_NUMNODES; n++, node++) {
  242. node = (node < MAX_NUMNODES) ? node : 0;
  243. if (!node_allowed(node))
  244. continue;
  245. spu = spu_alloc_node(node);
  246. if (spu)
  247. break;
  248. }
  249. return spu;
  250. }
  251. /**
  252. * find_victim - find a lower priority context to preempt
  253. * @ctx: canidate context for running
  254. *
  255. * Returns the freed physical spu to run the new context on.
  256. */
  257. static struct spu *find_victim(struct spu_context *ctx)
  258. {
  259. struct spu_context *victim = NULL;
  260. struct spu *spu;
  261. int node, n;
  262. /*
  263. * Look for a possible preemption candidate on the local node first.
  264. * If there is no candidate look at the other nodes. This isn't
  265. * exactly fair, but so far the whole spu schedule tries to keep
  266. * a strong node affinity. We might want to fine-tune this in
  267. * the future.
  268. */
  269. restart:
  270. node = cpu_to_node(raw_smp_processor_id());
  271. for (n = 0; n < MAX_NUMNODES; n++, node++) {
  272. node = (node < MAX_NUMNODES) ? node : 0;
  273. if (!node_allowed(node))
  274. continue;
  275. mutex_lock(&spu_prio->active_mutex[node]);
  276. list_for_each_entry(spu, &spu_prio->active_list[node], list) {
  277. struct spu_context *tmp = spu->ctx;
  278. if (tmp->rt_priority < ctx->rt_priority &&
  279. (!victim || tmp->rt_priority < victim->rt_priority))
  280. victim = spu->ctx;
  281. }
  282. mutex_unlock(&spu_prio->active_mutex[node]);
  283. if (victim) {
  284. /*
  285. * This nests ctx->state_mutex, but we always lock
  286. * higher priority contexts before lower priority
  287. * ones, so this is safe until we introduce
  288. * priority inheritance schemes.
  289. */
  290. if (!mutex_trylock(&victim->state_mutex)) {
  291. victim = NULL;
  292. goto restart;
  293. }
  294. spu = victim->spu;
  295. if (!spu) {
  296. /*
  297. * This race can happen because we've dropped
  298. * the active list mutex. No a problem, just
  299. * restart the search.
  300. */
  301. mutex_unlock(&victim->state_mutex);
  302. victim = NULL;
  303. goto restart;
  304. }
  305. spu_unbind_context(spu, victim);
  306. mutex_unlock(&victim->state_mutex);
  307. return spu;
  308. }
  309. }
  310. return NULL;
  311. }
  312. /**
  313. * spu_activate - find a free spu for a context and execute it
  314. * @ctx: spu context to schedule
  315. * @flags: flags (currently ignored)
  316. *
  317. * Tries to find a free spu to run @ctx. If no free spu is availble
  318. * add the context to the runqueue so it gets woken up once an spu
  319. * is available.
  320. */
  321. int spu_activate(struct spu_context *ctx, unsigned long flags)
  322. {
  323. if (ctx->spu)
  324. return 0;
  325. do {
  326. struct spu *spu;
  327. spu = spu_get_idle(ctx);
  328. /*
  329. * If this is a realtime thread we try to get it running by
  330. * preempting a lower priority thread.
  331. */
  332. if (!spu && ctx->rt_priority)
  333. spu = find_victim(ctx);
  334. if (spu) {
  335. spu_bind_context(spu, ctx);
  336. return 0;
  337. }
  338. spu_add_to_rq(ctx);
  339. if (!(flags & SPU_ACTIVATE_NOWAKE))
  340. spu_prio_wait(ctx);
  341. spu_del_from_rq(ctx);
  342. } while (!signal_pending(current));
  343. return -ERESTARTSYS;
  344. }
  345. /**
  346. * spu_deactivate - unbind a context from it's physical spu
  347. * @ctx: spu context to unbind
  348. *
  349. * Unbind @ctx from the physical spu it is running on and schedule
  350. * the highest priority context to run on the freed physical spu.
  351. */
  352. void spu_deactivate(struct spu_context *ctx)
  353. {
  354. struct spu *spu = ctx->spu;
  355. if (spu) {
  356. spu_unbind_context(spu, ctx);
  357. spu_reschedule(spu);
  358. }
  359. }
  360. /**
  361. * spu_yield - yield a physical spu if others are waiting
  362. * @ctx: spu context to yield
  363. *
  364. * Check if there is a higher priority context waiting and if yes
  365. * unbind @ctx from the physical spu and schedule the highest
  366. * priority context to run on the freed physical spu instead.
  367. */
  368. void spu_yield(struct spu_context *ctx)
  369. {
  370. struct spu *spu;
  371. int need_yield = 0;
  372. if (mutex_trylock(&ctx->state_mutex)) {
  373. if ((spu = ctx->spu) != NULL) {
  374. int best = sched_find_first_bit(spu_prio->bitmap);
  375. if (best < MAX_PRIO) {
  376. pr_debug("%s: yielding SPU %d NODE %d\n",
  377. __FUNCTION__, spu->number, spu->node);
  378. spu_deactivate(ctx);
  379. need_yield = 1;
  380. }
  381. }
  382. mutex_unlock(&ctx->state_mutex);
  383. }
  384. if (unlikely(need_yield))
  385. yield();
  386. }
  387. int __init spu_sched_init(void)
  388. {
  389. int i;
  390. spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
  391. if (!spu_prio) {
  392. printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
  393. __FUNCTION__);
  394. return 1;
  395. }
  396. for (i = 0; i < MAX_PRIO; i++) {
  397. INIT_LIST_HEAD(&spu_prio->runq[i]);
  398. __clear_bit(i, spu_prio->bitmap);
  399. }
  400. __set_bit(MAX_PRIO, spu_prio->bitmap);
  401. for (i = 0; i < MAX_NUMNODES; i++) {
  402. mutex_init(&spu_prio->active_mutex[i]);
  403. INIT_LIST_HEAD(&spu_prio->active_list[i]);
  404. }
  405. spin_lock_init(&spu_prio->runq_lock);
  406. return 0;
  407. }
  408. void __exit spu_sched_exit(void)
  409. {
  410. struct spu *spu, *tmp;
  411. int node;
  412. for (node = 0; node < MAX_NUMNODES; node++) {
  413. mutex_lock(&spu_prio->active_mutex[node]);
  414. list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
  415. list) {
  416. list_del_init(&spu->list);
  417. spu_free(spu);
  418. }
  419. mutex_unlock(&spu_prio->active_mutex[node]);
  420. }
  421. kfree(spu_prio);
  422. }