domain.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /*
  2. * arch/ia64/kernel/domain.c
  3. * Architecture specific sched-domains builder.
  4. *
  5. * Copyright (C) 2004 Jesse Barnes
  6. * Copyright (C) 2004 Silicon Graphics, Inc.
  7. */
  8. #include <linux/sched.h>
  9. #include <linux/percpu.h>
  10. #include <linux/slab.h>
  11. #include <linux/cpumask.h>
  12. #include <linux/init.h>
  13. #include <linux/topology.h>
  14. #include <linux/nodemask.h>
  15. #define SD_NODES_PER_DOMAIN 6
  16. #ifdef CONFIG_NUMA
  17. /**
  18. * find_next_best_node - find the next node to include in a sched_domain
  19. * @node: node whose sched_domain we're building
  20. * @used_nodes: nodes already in the sched_domain
  21. *
  22. * Find the next node to include in a given scheduling domain. Simply
  23. * finds the closest node not already in the @used_nodes map.
  24. *
  25. * Should use nodemask_t.
  26. */
  27. static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
  28. {
  29. int i, n, val, min_val, best_node = 0;
  30. min_val = INT_MAX;
  31. for (i = 0; i < MAX_NUMNODES; i++) {
  32. /* Start at @node */
  33. n = (node + i) % MAX_NUMNODES;
  34. if (!nr_cpus_node(n))
  35. continue;
  36. /* Skip already used nodes */
  37. if (test_bit(n, used_nodes))
  38. continue;
  39. /* Simple min distance search */
  40. val = node_distance(node, n);
  41. if (val < min_val) {
  42. min_val = val;
  43. best_node = n;
  44. }
  45. }
  46. set_bit(best_node, used_nodes);
  47. return best_node;
  48. }
  49. /**
  50. * sched_domain_node_span - get a cpumask for a node's sched_domain
  51. * @node: node whose cpumask we're constructing
  52. * @size: number of nodes to include in this span
  53. *
  54. * Given a node, construct a good cpumask for its sched_domain to span. It
  55. * should be one that prevents unnecessary balancing, but also spreads tasks
  56. * out optimally.
  57. */
  58. static cpumask_t __devinit sched_domain_node_span(int node)
  59. {
  60. int i;
  61. cpumask_t span, nodemask;
  62. DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
  63. cpus_clear(span);
  64. bitmap_zero(used_nodes, MAX_NUMNODES);
  65. nodemask = node_to_cpumask(node);
  66. cpus_or(span, span, nodemask);
  67. set_bit(node, used_nodes);
  68. for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
  69. int next_node = find_next_best_node(node, used_nodes);
  70. nodemask = node_to_cpumask(next_node);
  71. cpus_or(span, span, nodemask);
  72. }
  73. return span;
  74. }
  75. #endif
  76. /*
  77. * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
  78. * can switch it on easily if needed.
  79. */
  80. #ifdef CONFIG_SCHED_SMT
  81. static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
  82. static struct sched_group sched_group_cpus[NR_CPUS];
  83. static int __devinit cpu_to_cpu_group(int cpu)
  84. {
  85. return cpu;
  86. }
  87. #endif
  88. static DEFINE_PER_CPU(struct sched_domain, phys_domains);
  89. static struct sched_group sched_group_phys[NR_CPUS];
  90. static int __devinit cpu_to_phys_group(int cpu)
  91. {
  92. #ifdef CONFIG_SCHED_SMT
  93. return first_cpu(cpu_sibling_map[cpu]);
  94. #else
  95. return cpu;
  96. #endif
  97. }
  98. #ifdef CONFIG_NUMA
  99. /*
  100. * The init_sched_build_groups can't handle what we want to do with node
  101. * groups, so roll our own. Now each node has its own list of groups which
  102. * gets dynamically allocated.
  103. */
  104. static DEFINE_PER_CPU(struct sched_domain, node_domains);
  105. static struct sched_group *sched_group_nodes[MAX_NUMNODES];
  106. static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
  107. static struct sched_group sched_group_allnodes[MAX_NUMNODES];
  108. static int __devinit cpu_to_allnodes_group(int cpu)
  109. {
  110. return cpu_to_node(cpu);
  111. }
  112. #endif
  113. /*
  114. * Set up scheduler domains and groups. Callers must hold the hotplug lock.
  115. */
  116. void __devinit arch_init_sched_domains(void)
  117. {
  118. int i;
  119. cpumask_t cpu_default_map;
  120. /*
  121. * Setup mask for cpus without special case scheduling requirements.
  122. * For now this just excludes isolated cpus, but could be used to
  123. * exclude other special cases in the future.
  124. */
  125. cpus_complement(cpu_default_map, cpu_isolated_map);
  126. cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
  127. /*
  128. * Set up domains. Isolated domains just stay on the dummy domain.
  129. */
  130. for_each_cpu_mask(i, cpu_default_map) {
  131. int group;
  132. struct sched_domain *sd = NULL, *p;
  133. cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
  134. cpus_and(nodemask, nodemask, cpu_default_map);
  135. #ifdef CONFIG_NUMA
  136. if (num_online_cpus()
  137. > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
  138. sd = &per_cpu(allnodes_domains, i);
  139. *sd = SD_ALLNODES_INIT;
  140. sd->span = cpu_default_map;
  141. group = cpu_to_allnodes_group(i);
  142. sd->groups = &sched_group_allnodes[group];
  143. p = sd;
  144. } else
  145. p = NULL;
  146. sd = &per_cpu(node_domains, i);
  147. *sd = SD_NODE_INIT;
  148. sd->span = sched_domain_node_span(cpu_to_node(i));
  149. sd->parent = p;
  150. cpus_and(sd->span, sd->span, cpu_default_map);
  151. #endif
  152. p = sd;
  153. sd = &per_cpu(phys_domains, i);
  154. group = cpu_to_phys_group(i);
  155. *sd = SD_CPU_INIT;
  156. sd->span = nodemask;
  157. sd->parent = p;
  158. sd->groups = &sched_group_phys[group];
  159. #ifdef CONFIG_SCHED_SMT
  160. p = sd;
  161. sd = &per_cpu(cpu_domains, i);
  162. group = cpu_to_cpu_group(i);
  163. *sd = SD_SIBLING_INIT;
  164. sd->span = cpu_sibling_map[i];
  165. cpus_and(sd->span, sd->span, cpu_default_map);
  166. sd->parent = p;
  167. sd->groups = &sched_group_cpus[group];
  168. #endif
  169. }
  170. #ifdef CONFIG_SCHED_SMT
  171. /* Set up CPU (sibling) groups */
  172. for_each_cpu_mask(i, cpu_default_map) {
  173. cpumask_t this_sibling_map = cpu_sibling_map[i];
  174. cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
  175. if (i != first_cpu(this_sibling_map))
  176. continue;
  177. init_sched_build_groups(sched_group_cpus, this_sibling_map,
  178. &cpu_to_cpu_group);
  179. }
  180. #endif
  181. /* Set up physical groups */
  182. for (i = 0; i < MAX_NUMNODES; i++) {
  183. cpumask_t nodemask = node_to_cpumask(i);
  184. cpus_and(nodemask, nodemask, cpu_default_map);
  185. if (cpus_empty(nodemask))
  186. continue;
  187. init_sched_build_groups(sched_group_phys, nodemask,
  188. &cpu_to_phys_group);
  189. }
  190. #ifdef CONFIG_NUMA
  191. init_sched_build_groups(sched_group_allnodes, cpu_default_map,
  192. &cpu_to_allnodes_group);
  193. for (i = 0; i < MAX_NUMNODES; i++) {
  194. /* Set up node groups */
  195. struct sched_group *sg, *prev;
  196. cpumask_t nodemask = node_to_cpumask(i);
  197. cpumask_t domainspan;
  198. cpumask_t covered = CPU_MASK_NONE;
  199. int j;
  200. cpus_and(nodemask, nodemask, cpu_default_map);
  201. if (cpus_empty(nodemask))
  202. continue;
  203. domainspan = sched_domain_node_span(i);
  204. cpus_and(domainspan, domainspan, cpu_default_map);
  205. sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
  206. sched_group_nodes[i] = sg;
  207. for_each_cpu_mask(j, nodemask) {
  208. struct sched_domain *sd;
  209. sd = &per_cpu(node_domains, j);
  210. sd->groups = sg;
  211. if (sd->groups == NULL) {
  212. /* Turn off balancing if we have no groups */
  213. sd->flags = 0;
  214. }
  215. }
  216. if (!sg) {
  217. printk(KERN_WARNING
  218. "Can not alloc domain group for node %d\n", i);
  219. continue;
  220. }
  221. sg->cpu_power = 0;
  222. sg->cpumask = nodemask;
  223. cpus_or(covered, covered, nodemask);
  224. prev = sg;
  225. for (j = 0; j < MAX_NUMNODES; j++) {
  226. cpumask_t tmp, notcovered;
  227. int n = (i + j) % MAX_NUMNODES;
  228. cpus_complement(notcovered, covered);
  229. cpus_and(tmp, notcovered, cpu_default_map);
  230. cpus_and(tmp, tmp, domainspan);
  231. if (cpus_empty(tmp))
  232. break;
  233. nodemask = node_to_cpumask(n);
  234. cpus_and(tmp, tmp, nodemask);
  235. if (cpus_empty(tmp))
  236. continue;
  237. sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
  238. if (!sg) {
  239. printk(KERN_WARNING
  240. "Can not alloc domain group for node %d\n", j);
  241. break;
  242. }
  243. sg->cpu_power = 0;
  244. sg->cpumask = tmp;
  245. cpus_or(covered, covered, tmp);
  246. prev->next = sg;
  247. prev = sg;
  248. }
  249. prev->next = sched_group_nodes[i];
  250. }
  251. #endif
  252. /* Calculate CPU power for physical packages and nodes */
  253. for_each_cpu_mask(i, cpu_default_map) {
  254. int power;
  255. struct sched_domain *sd;
  256. #ifdef CONFIG_SCHED_SMT
  257. sd = &per_cpu(cpu_domains, i);
  258. power = SCHED_LOAD_SCALE;
  259. sd->groups->cpu_power = power;
  260. #endif
  261. sd = &per_cpu(phys_domains, i);
  262. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  263. (cpus_weight(sd->groups->cpumask)-1) / 10;
  264. sd->groups->cpu_power = power;
  265. #ifdef CONFIG_NUMA
  266. sd = &per_cpu(allnodes_domains, i);
  267. if (sd->groups) {
  268. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  269. (cpus_weight(sd->groups->cpumask)-1) / 10;
  270. sd->groups->cpu_power = power;
  271. }
  272. #endif
  273. }
  274. #ifdef CONFIG_NUMA
  275. for (i = 0; i < MAX_NUMNODES; i++) {
  276. struct sched_group *sg = sched_group_nodes[i];
  277. int j;
  278. if (sg == NULL)
  279. continue;
  280. next_sg:
  281. for_each_cpu_mask(j, sg->cpumask) {
  282. struct sched_domain *sd;
  283. int power;
  284. sd = &per_cpu(phys_domains, j);
  285. if (j != first_cpu(sd->groups->cpumask)) {
  286. /*
  287. * Only add "power" once for each
  288. * physical package.
  289. */
  290. continue;
  291. }
  292. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  293. (cpus_weight(sd->groups->cpumask)-1) / 10;
  294. sg->cpu_power += power;
  295. }
  296. sg = sg->next;
  297. if (sg != sched_group_nodes[i])
  298. goto next_sg;
  299. }
  300. #endif
  301. /* Attach the domains */
  302. for_each_online_cpu(i) {
  303. struct sched_domain *sd;
  304. #ifdef CONFIG_SCHED_SMT
  305. sd = &per_cpu(cpu_domains, i);
  306. #else
  307. sd = &per_cpu(phys_domains, i);
  308. #endif
  309. cpu_attach_domain(sd, i);
  310. }
  311. }
  312. void __devinit arch_destroy_sched_domains(void)
  313. {
  314. #ifdef CONFIG_NUMA
  315. int i;
  316. for (i = 0; i < MAX_NUMNODES; i++) {
  317. struct sched_group *oldsg, *sg = sched_group_nodes[i];
  318. if (sg == NULL)
  319. continue;
  320. sg = sg->next;
  321. next_sg:
  322. oldsg = sg;
  323. sg = sg->next;
  324. kfree(oldsg);
  325. if (oldsg != sched_group_nodes[i])
  326. goto next_sg;
  327. sched_group_nodes[i] = NULL;
  328. }
  329. #endif
  330. }