sched.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /* sched.c - SPU scheduler.
  2. *
  3. * Copyright (C) IBM 2005
  4. * Author: Mark Nutter <mnutter@us.ibm.com>
  5. *
  6. * 2006-03-31 NUMA domains added.
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2, or (at your option)
  11. * any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. */
  22. #undef DEBUG
  23. #include <linux/module.h>
  24. #include <linux/errno.h>
  25. #include <linux/sched.h>
  26. #include <linux/kernel.h>
  27. #include <linux/mm.h>
  28. #include <linux/completion.h>
  29. #include <linux/vmalloc.h>
  30. #include <linux/smp.h>
  31. #include <linux/stddef.h>
  32. #include <linux/unistd.h>
  33. #include <linux/numa.h>
  34. #include <linux/mutex.h>
  35. #include <linux/notifier.h>
  36. #include <asm/io.h>
  37. #include <asm/mmu_context.h>
  38. #include <asm/spu.h>
  39. #include <asm/spu_csa.h>
  40. #include <asm/spu_priv1.h>
  41. #include "spufs.h"
  42. #define SPU_TIMESLICE (HZ)
  43. struct spu_prio_array {
  44. DECLARE_BITMAP(bitmap, MAX_PRIO);
  45. struct list_head runq[MAX_PRIO];
  46. spinlock_t runq_lock;
  47. struct list_head active_list[MAX_NUMNODES];
  48. struct mutex active_mutex[MAX_NUMNODES];
  49. };
  50. static struct spu_prio_array *spu_prio;
  51. static struct workqueue_struct *spu_sched_wq;
  52. static inline int node_allowed(int node)
  53. {
  54. cpumask_t mask;
  55. if (!nr_cpus_node(node))
  56. return 0;
  57. mask = node_to_cpumask(node);
  58. if (!cpus_intersects(mask, current->cpus_allowed))
  59. return 0;
  60. return 1;
  61. }
  62. void spu_start_tick(struct spu_context *ctx)
  63. {
  64. if (ctx->policy == SCHED_RR) {
  65. /*
  66. * Make sure the exiting bit is cleared.
  67. */
  68. clear_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
  69. mb();
  70. queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
  71. }
  72. }
  73. void spu_stop_tick(struct spu_context *ctx)
  74. {
  75. if (ctx->policy == SCHED_RR) {
  76. /*
  77. * While the work can be rearming normally setting this flag
  78. * makes sure it does not rearm itself anymore.
  79. */
  80. set_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
  81. mb();
  82. cancel_delayed_work(&ctx->sched_work);
  83. }
  84. }
  85. /**
  86. * spu_add_to_active_list - add spu to active list
  87. * @spu: spu to add to the active list
  88. */
  89. static void spu_add_to_active_list(struct spu *spu)
  90. {
  91. mutex_lock(&spu_prio->active_mutex[spu->node]);
  92. list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
  93. mutex_unlock(&spu_prio->active_mutex[spu->node]);
  94. }
  95. /**
  96. * spu_remove_from_active_list - remove spu from active list
  97. * @spu: spu to remove from the active list
  98. */
  99. static void spu_remove_from_active_list(struct spu *spu)
  100. {
  101. int node = spu->node;
  102. mutex_lock(&spu_prio->active_mutex[node]);
  103. list_del_init(&spu->list);
  104. mutex_unlock(&spu_prio->active_mutex[node]);
  105. }
  106. static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
  107. static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
  108. {
  109. blocking_notifier_call_chain(&spu_switch_notifier,
  110. ctx ? ctx->object_id : 0, spu);
  111. }
  112. int spu_switch_event_register(struct notifier_block * n)
  113. {
  114. return blocking_notifier_chain_register(&spu_switch_notifier, n);
  115. }
  116. int spu_switch_event_unregister(struct notifier_block * n)
  117. {
  118. return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
  119. }
  120. /**
  121. * spu_bind_context - bind spu context to physical spu
  122. * @spu: physical spu to bind to
  123. * @ctx: context to bind
  124. */
  125. static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
  126. {
  127. pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
  128. spu->number, spu->node);
  129. spu->ctx = ctx;
  130. spu->flags = 0;
  131. ctx->spu = spu;
  132. ctx->ops = &spu_hw_ops;
  133. spu->pid = current->pid;
  134. spu_associate_mm(spu, ctx->owner);
  135. spu->ibox_callback = spufs_ibox_callback;
  136. spu->wbox_callback = spufs_wbox_callback;
  137. spu->stop_callback = spufs_stop_callback;
  138. spu->mfc_callback = spufs_mfc_callback;
  139. spu->dma_callback = spufs_dma_callback;
  140. mb();
  141. spu_unmap_mappings(ctx);
  142. spu_restore(&ctx->csa, spu);
  143. spu->timestamp = jiffies;
  144. spu_cpu_affinity_set(spu, raw_smp_processor_id());
  145. spu_switch_notify(spu, ctx);
  146. spu_add_to_active_list(spu);
  147. ctx->state = SPU_STATE_RUNNABLE;
  148. }
  149. /**
  150. * spu_unbind_context - unbind spu context from physical spu
  151. * @spu: physical spu to unbind from
  152. * @ctx: context to unbind
  153. */
  154. static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  155. {
  156. pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
  157. spu->pid, spu->number, spu->node);
  158. spu_remove_from_active_list(spu);
  159. spu_switch_notify(spu, NULL);
  160. spu_unmap_mappings(ctx);
  161. spu_save(&ctx->csa, spu);
  162. spu->timestamp = jiffies;
  163. ctx->state = SPU_STATE_SAVED;
  164. spu->ibox_callback = NULL;
  165. spu->wbox_callback = NULL;
  166. spu->stop_callback = NULL;
  167. spu->mfc_callback = NULL;
  168. spu->dma_callback = NULL;
  169. spu_associate_mm(spu, NULL);
  170. spu->pid = 0;
  171. ctx->ops = &spu_backing_ops;
  172. ctx->spu = NULL;
  173. spu->flags = 0;
  174. spu->ctx = NULL;
  175. }
  176. /**
  177. * spu_add_to_rq - add a context to the runqueue
  178. * @ctx: context to add
  179. */
  180. static void __spu_add_to_rq(struct spu_context *ctx)
  181. {
  182. int prio = ctx->prio;
  183. list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
  184. set_bit(prio, spu_prio->bitmap);
  185. }
  186. static void __spu_del_from_rq(struct spu_context *ctx)
  187. {
  188. int prio = ctx->prio;
  189. if (!list_empty(&ctx->rq))
  190. list_del_init(&ctx->rq);
  191. if (list_empty(&spu_prio->runq[prio]))
  192. clear_bit(prio, spu_prio->bitmap);
  193. }
  194. static void spu_prio_wait(struct spu_context *ctx)
  195. {
  196. DEFINE_WAIT(wait);
  197. spin_lock(&spu_prio->runq_lock);
  198. prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
  199. if (!signal_pending(current)) {
  200. __spu_add_to_rq(ctx);
  201. spin_unlock(&spu_prio->runq_lock);
  202. mutex_unlock(&ctx->state_mutex);
  203. schedule();
  204. mutex_lock(&ctx->state_mutex);
  205. spin_lock(&spu_prio->runq_lock);
  206. __spu_del_from_rq(ctx);
  207. }
  208. spin_unlock(&spu_prio->runq_lock);
  209. __set_current_state(TASK_RUNNING);
  210. remove_wait_queue(&ctx->stop_wq, &wait);
  211. }
  212. static struct spu *spu_get_idle(struct spu_context *ctx)
  213. {
  214. struct spu *spu = NULL;
  215. int node = cpu_to_node(raw_smp_processor_id());
  216. int n;
  217. for (n = 0; n < MAX_NUMNODES; n++, node++) {
  218. node = (node < MAX_NUMNODES) ? node : 0;
  219. if (!node_allowed(node))
  220. continue;
  221. spu = spu_alloc_node(node);
  222. if (spu)
  223. break;
  224. }
  225. return spu;
  226. }
  227. /**
  228. * find_victim - find a lower priority context to preempt
  229. * @ctx: canidate context for running
  230. *
  231. * Returns the freed physical spu to run the new context on.
  232. */
  233. static struct spu *find_victim(struct spu_context *ctx)
  234. {
  235. struct spu_context *victim = NULL;
  236. struct spu *spu;
  237. int node, n;
  238. /*
  239. * Look for a possible preemption candidate on the local node first.
  240. * If there is no candidate look at the other nodes. This isn't
  241. * exactly fair, but so far the whole spu schedule tries to keep
  242. * a strong node affinity. We might want to fine-tune this in
  243. * the future.
  244. */
  245. restart:
  246. node = cpu_to_node(raw_smp_processor_id());
  247. for (n = 0; n < MAX_NUMNODES; n++, node++) {
  248. node = (node < MAX_NUMNODES) ? node : 0;
  249. if (!node_allowed(node))
  250. continue;
  251. mutex_lock(&spu_prio->active_mutex[node]);
  252. list_for_each_entry(spu, &spu_prio->active_list[node], list) {
  253. struct spu_context *tmp = spu->ctx;
  254. if (tmp->rt_priority < ctx->rt_priority &&
  255. (!victim || tmp->rt_priority < victim->rt_priority))
  256. victim = spu->ctx;
  257. }
  258. mutex_unlock(&spu_prio->active_mutex[node]);
  259. if (victim) {
  260. /*
  261. * This nests ctx->state_mutex, but we always lock
  262. * higher priority contexts before lower priority
  263. * ones, so this is safe until we introduce
  264. * priority inheritance schemes.
  265. */
  266. if (!mutex_trylock(&victim->state_mutex)) {
  267. victim = NULL;
  268. goto restart;
  269. }
  270. spu = victim->spu;
  271. if (!spu) {
  272. /*
  273. * This race can happen because we've dropped
  274. * the active list mutex. No a problem, just
  275. * restart the search.
  276. */
  277. mutex_unlock(&victim->state_mutex);
  278. victim = NULL;
  279. goto restart;
  280. }
  281. spu_unbind_context(spu, victim);
  282. mutex_unlock(&victim->state_mutex);
  283. /*
  284. * We need to break out of the wait loop in spu_run
  285. * manually to ensure this context gets put on the
  286. * runqueue again ASAP.
  287. */
  288. wake_up(&victim->stop_wq);
  289. return spu;
  290. }
  291. }
  292. return NULL;
  293. }
  294. /**
  295. * spu_activate - find a free spu for a context and execute it
  296. * @ctx: spu context to schedule
  297. * @flags: flags (currently ignored)
  298. *
  299. * Tries to find a free spu to run @ctx. If no free spu is available
  300. * add the context to the runqueue so it gets woken up once an spu
  301. * is available.
  302. */
  303. int spu_activate(struct spu_context *ctx, unsigned long flags)
  304. {
  305. if (ctx->spu)
  306. return 0;
  307. do {
  308. struct spu *spu;
  309. spu = spu_get_idle(ctx);
  310. /*
  311. * If this is a realtime thread we try to get it running by
  312. * preempting a lower priority thread.
  313. */
  314. if (!spu && ctx->rt_priority)
  315. spu = find_victim(ctx);
  316. if (spu) {
  317. spu_bind_context(spu, ctx);
  318. return 0;
  319. }
  320. spu_prio_wait(ctx);
  321. } while (!signal_pending(current));
  322. return -ERESTARTSYS;
  323. }
  324. /**
  325. * grab_runnable_context - try to find a runnable context
  326. *
  327. * Remove the highest priority context on the runqueue and return it
  328. * to the caller. Returns %NULL if no runnable context was found.
  329. */
  330. static struct spu_context *grab_runnable_context(int prio)
  331. {
  332. struct spu_context *ctx = NULL;
  333. int best;
  334. spin_lock(&spu_prio->runq_lock);
  335. best = sched_find_first_bit(spu_prio->bitmap);
  336. if (best < prio) {
  337. struct list_head *rq = &spu_prio->runq[best];
  338. BUG_ON(list_empty(rq));
  339. ctx = list_entry(rq->next, struct spu_context, rq);
  340. __spu_del_from_rq(ctx);
  341. }
  342. spin_unlock(&spu_prio->runq_lock);
  343. return ctx;
  344. }
  345. static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
  346. {
  347. struct spu *spu = ctx->spu;
  348. struct spu_context *new = NULL;
  349. if (spu) {
  350. new = grab_runnable_context(max_prio);
  351. if (new || force) {
  352. spu_unbind_context(spu, ctx);
  353. spu_free(spu);
  354. if (new)
  355. wake_up(&new->stop_wq);
  356. }
  357. }
  358. return new != NULL;
  359. }
  360. /**
  361. * spu_deactivate - unbind a context from it's physical spu
  362. * @ctx: spu context to unbind
  363. *
  364. * Unbind @ctx from the physical spu it is running on and schedule
  365. * the highest priority context to run on the freed physical spu.
  366. */
  367. void spu_deactivate(struct spu_context *ctx)
  368. {
  369. __spu_deactivate(ctx, 1, MAX_PRIO);
  370. }
  371. /**
  372. * spu_yield - yield a physical spu if others are waiting
  373. * @ctx: spu context to yield
  374. *
  375. * Check if there is a higher priority context waiting and if yes
  376. * unbind @ctx from the physical spu and schedule the highest
  377. * priority context to run on the freed physical spu instead.
  378. */
  379. void spu_yield(struct spu_context *ctx)
  380. {
  381. if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
  382. mutex_lock(&ctx->state_mutex);
  383. __spu_deactivate(ctx, 0, MAX_PRIO);
  384. mutex_unlock(&ctx->state_mutex);
  385. }
  386. }
  387. void spu_sched_tick(struct work_struct *work)
  388. {
  389. struct spu_context *ctx =
  390. container_of(work, struct spu_context, sched_work.work);
  391. int preempted;
  392. /*
  393. * If this context is being stopped avoid rescheduling from the
  394. * scheduler tick because we would block on the state_mutex.
  395. * The caller will yield the spu later on anyway.
  396. */
  397. if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags))
  398. return;
  399. mutex_lock(&ctx->state_mutex);
  400. preempted = __spu_deactivate(ctx, 0, ctx->prio + 1);
  401. mutex_unlock(&ctx->state_mutex);
  402. if (preempted) {
  403. /*
  404. * We need to break out of the wait loop in spu_run manually
  405. * to ensure this context gets put on the runqueue again
  406. * ASAP.
  407. */
  408. wake_up(&ctx->stop_wq);
  409. } else {
  410. spu_start_tick(ctx);
  411. }
  412. }
  413. int __init spu_sched_init(void)
  414. {
  415. int i;
  416. spu_sched_wq = create_singlethread_workqueue("spusched");
  417. if (!spu_sched_wq)
  418. return 1;
  419. spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
  420. if (!spu_prio) {
  421. printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
  422. __FUNCTION__);
  423. destroy_workqueue(spu_sched_wq);
  424. return 1;
  425. }
  426. for (i = 0; i < MAX_PRIO; i++) {
  427. INIT_LIST_HEAD(&spu_prio->runq[i]);
  428. __clear_bit(i, spu_prio->bitmap);
  429. }
  430. __set_bit(MAX_PRIO, spu_prio->bitmap);
  431. for (i = 0; i < MAX_NUMNODES; i++) {
  432. mutex_init(&spu_prio->active_mutex[i]);
  433. INIT_LIST_HEAD(&spu_prio->active_list[i]);
  434. }
  435. spin_lock_init(&spu_prio->runq_lock);
  436. return 0;
  437. }
  438. void __exit spu_sched_exit(void)
  439. {
  440. struct spu *spu, *tmp;
  441. int node;
  442. for (node = 0; node < MAX_NUMNODES; node++) {
  443. mutex_lock(&spu_prio->active_mutex[node]);
  444. list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
  445. list) {
  446. list_del_init(&spu->list);
  447. spu_free(spu);
  448. }
  449. mutex_unlock(&spu_prio->active_mutex[node]);
  450. }
  451. kfree(spu_prio);
  452. destroy_workqueue(spu_sched_wq);
  453. }