domain.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /*
  2. * arch/ia64/kernel/domain.c
  3. * Architecture specific sched-domains builder.
  4. *
  5. * Copyright (C) 2004 Jesse Barnes
  6. * Copyright (C) 2004 Silicon Graphics, Inc.
  7. */
  8. #include <linux/sched.h>
  9. #include <linux/percpu.h>
  10. #include <linux/slab.h>
  11. #include <linux/cpumask.h>
  12. #include <linux/init.h>
  13. #include <linux/topology.h>
  14. #include <linux/nodemask.h>
  15. #define SD_NODES_PER_DOMAIN 16
  16. #ifdef CONFIG_NUMA
  17. /**
  18. * find_next_best_node - find the next node to include in a sched_domain
  19. * @node: node whose sched_domain we're building
  20. * @used_nodes: nodes already in the sched_domain
  21. *
  22. * Find the next node to include in a given scheduling domain. Simply
  23. * finds the closest node not already in the @used_nodes map.
  24. *
  25. * Should use nodemask_t.
  26. */
  27. static int find_next_best_node(int node, unsigned long *used_nodes)
  28. {
  29. int i, n, val, min_val, best_node = 0;
  30. min_val = INT_MAX;
  31. for (i = 0; i < MAX_NUMNODES; i++) {
  32. /* Start at @node */
  33. n = (node + i) % MAX_NUMNODES;
  34. if (!nr_cpus_node(n))
  35. continue;
  36. /* Skip already used nodes */
  37. if (test_bit(n, used_nodes))
  38. continue;
  39. /* Simple min distance search */
  40. val = node_distance(node, n);
  41. if (val < min_val) {
  42. min_val = val;
  43. best_node = n;
  44. }
  45. }
  46. set_bit(best_node, used_nodes);
  47. return best_node;
  48. }
  49. /**
  50. * sched_domain_node_span - get a cpumask for a node's sched_domain
  51. * @node: node whose cpumask we're constructing
  52. * @size: number of nodes to include in this span
  53. *
  54. * Given a node, construct a good cpumask for its sched_domain to span. It
  55. * should be one that prevents unnecessary balancing, but also spreads tasks
  56. * out optimally.
  57. */
  58. static cpumask_t sched_domain_node_span(int node)
  59. {
  60. int i;
  61. cpumask_t span, nodemask;
  62. DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
  63. cpus_clear(span);
  64. bitmap_zero(used_nodes, MAX_NUMNODES);
  65. nodemask = node_to_cpumask(node);
  66. cpus_or(span, span, nodemask);
  67. set_bit(node, used_nodes);
  68. for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
  69. int next_node = find_next_best_node(node, used_nodes);
  70. nodemask = node_to_cpumask(next_node);
  71. cpus_or(span, span, nodemask);
  72. }
  73. return span;
  74. }
  75. #endif
  76. /*
  77. * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
  78. * can switch it on easily if needed.
  79. */
  80. #ifdef CONFIG_SCHED_SMT
  81. static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
  82. static struct sched_group sched_group_cpus[NR_CPUS];
  83. static int cpu_to_cpu_group(int cpu)
  84. {
  85. return cpu;
  86. }
  87. #endif
  88. static DEFINE_PER_CPU(struct sched_domain, phys_domains);
  89. static struct sched_group sched_group_phys[NR_CPUS];
  90. static int cpu_to_phys_group(int cpu)
  91. {
  92. #ifdef CONFIG_SCHED_SMT
  93. return first_cpu(cpu_sibling_map[cpu]);
  94. #else
  95. return cpu;
  96. #endif
  97. }
  98. #ifdef CONFIG_NUMA
  99. /*
  100. * The init_sched_build_groups can't handle what we want to do with node
  101. * groups, so roll our own. Now each node has its own list of groups which
  102. * gets dynamically allocated.
  103. */
  104. static DEFINE_PER_CPU(struct sched_domain, node_domains);
  105. static struct sched_group *sched_group_nodes[MAX_NUMNODES];
  106. static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
  107. static struct sched_group sched_group_allnodes[MAX_NUMNODES];
  108. static int cpu_to_allnodes_group(int cpu)
  109. {
  110. return cpu_to_node(cpu);
  111. }
  112. #endif
  113. /*
  114. * Build sched domains for a given set of cpus and attach the sched domains
  115. * to the individual cpus
  116. */
  117. void build_sched_domains(const cpumask_t *cpu_map)
  118. {
  119. int i;
  120. /*
  121. * Set up domains for cpus specified by the cpu_map.
  122. */
  123. for_each_cpu_mask(i, *cpu_map) {
  124. int group;
  125. struct sched_domain *sd = NULL, *p;
  126. cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
  127. cpus_and(nodemask, nodemask, *cpu_map);
  128. #ifdef CONFIG_NUMA
  129. if (num_online_cpus()
  130. > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
  131. sd = &per_cpu(allnodes_domains, i);
  132. *sd = SD_ALLNODES_INIT;
  133. sd->span = *cpu_map;
  134. group = cpu_to_allnodes_group(i);
  135. sd->groups = &sched_group_allnodes[group];
  136. p = sd;
  137. } else
  138. p = NULL;
  139. sd = &per_cpu(node_domains, i);
  140. *sd = SD_NODE_INIT;
  141. sd->span = sched_domain_node_span(cpu_to_node(i));
  142. sd->parent = p;
  143. cpus_and(sd->span, sd->span, *cpu_map);
  144. #endif
  145. p = sd;
  146. sd = &per_cpu(phys_domains, i);
  147. group = cpu_to_phys_group(i);
  148. *sd = SD_CPU_INIT;
  149. sd->span = nodemask;
  150. sd->parent = p;
  151. sd->groups = &sched_group_phys[group];
  152. #ifdef CONFIG_SCHED_SMT
  153. p = sd;
  154. sd = &per_cpu(cpu_domains, i);
  155. group = cpu_to_cpu_group(i);
  156. *sd = SD_SIBLING_INIT;
  157. sd->span = cpu_sibling_map[i];
  158. cpus_and(sd->span, sd->span, *cpu_map);
  159. sd->parent = p;
  160. sd->groups = &sched_group_cpus[group];
  161. #endif
  162. }
  163. #ifdef CONFIG_SCHED_SMT
  164. /* Set up CPU (sibling) groups */
  165. for_each_cpu_mask(i, *cpu_map) {
  166. cpumask_t this_sibling_map = cpu_sibling_map[i];
  167. cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
  168. if (i != first_cpu(this_sibling_map))
  169. continue;
  170. init_sched_build_groups(sched_group_cpus, this_sibling_map,
  171. &cpu_to_cpu_group);
  172. }
  173. #endif
  174. /* Set up physical groups */
  175. for (i = 0; i < MAX_NUMNODES; i++) {
  176. cpumask_t nodemask = node_to_cpumask(i);
  177. cpus_and(nodemask, nodemask, *cpu_map);
  178. if (cpus_empty(nodemask))
  179. continue;
  180. init_sched_build_groups(sched_group_phys, nodemask,
  181. &cpu_to_phys_group);
  182. }
  183. #ifdef CONFIG_NUMA
  184. init_sched_build_groups(sched_group_allnodes, *cpu_map,
  185. &cpu_to_allnodes_group);
  186. for (i = 0; i < MAX_NUMNODES; i++) {
  187. /* Set up node groups */
  188. struct sched_group *sg, *prev;
  189. cpumask_t nodemask = node_to_cpumask(i);
  190. cpumask_t domainspan;
  191. cpumask_t covered = CPU_MASK_NONE;
  192. int j;
  193. cpus_and(nodemask, nodemask, *cpu_map);
  194. if (cpus_empty(nodemask))
  195. continue;
  196. domainspan = sched_domain_node_span(i);
  197. cpus_and(domainspan, domainspan, *cpu_map);
  198. sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
  199. sched_group_nodes[i] = sg;
  200. for_each_cpu_mask(j, nodemask) {
  201. struct sched_domain *sd;
  202. sd = &per_cpu(node_domains, j);
  203. sd->groups = sg;
  204. if (sd->groups == NULL) {
  205. /* Turn off balancing if we have no groups */
  206. sd->flags = 0;
  207. }
  208. }
  209. if (!sg) {
  210. printk(KERN_WARNING
  211. "Can not alloc domain group for node %d\n", i);
  212. continue;
  213. }
  214. sg->cpu_power = 0;
  215. sg->cpumask = nodemask;
  216. cpus_or(covered, covered, nodemask);
  217. prev = sg;
  218. for (j = 0; j < MAX_NUMNODES; j++) {
  219. cpumask_t tmp, notcovered;
  220. int n = (i + j) % MAX_NUMNODES;
  221. cpus_complement(notcovered, covered);
  222. cpus_and(tmp, notcovered, *cpu_map);
  223. cpus_and(tmp, tmp, domainspan);
  224. if (cpus_empty(tmp))
  225. break;
  226. nodemask = node_to_cpumask(n);
  227. cpus_and(tmp, tmp, nodemask);
  228. if (cpus_empty(tmp))
  229. continue;
  230. sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
  231. if (!sg) {
  232. printk(KERN_WARNING
  233. "Can not alloc domain group for node %d\n", j);
  234. break;
  235. }
  236. sg->cpu_power = 0;
  237. sg->cpumask = tmp;
  238. cpus_or(covered, covered, tmp);
  239. prev->next = sg;
  240. prev = sg;
  241. }
  242. prev->next = sched_group_nodes[i];
  243. }
  244. #endif
  245. /* Calculate CPU power for physical packages and nodes */
  246. for_each_cpu_mask(i, *cpu_map) {
  247. int power;
  248. struct sched_domain *sd;
  249. #ifdef CONFIG_SCHED_SMT
  250. sd = &per_cpu(cpu_domains, i);
  251. power = SCHED_LOAD_SCALE;
  252. sd->groups->cpu_power = power;
  253. #endif
  254. sd = &per_cpu(phys_domains, i);
  255. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  256. (cpus_weight(sd->groups->cpumask)-1) / 10;
  257. sd->groups->cpu_power = power;
  258. #ifdef CONFIG_NUMA
  259. sd = &per_cpu(allnodes_domains, i);
  260. if (sd->groups) {
  261. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  262. (cpus_weight(sd->groups->cpumask)-1) / 10;
  263. sd->groups->cpu_power = power;
  264. }
  265. #endif
  266. }
  267. #ifdef CONFIG_NUMA
  268. for (i = 0; i < MAX_NUMNODES; i++) {
  269. struct sched_group *sg = sched_group_nodes[i];
  270. int j;
  271. if (sg == NULL)
  272. continue;
  273. next_sg:
  274. for_each_cpu_mask(j, sg->cpumask) {
  275. struct sched_domain *sd;
  276. int power;
  277. sd = &per_cpu(phys_domains, j);
  278. if (j != first_cpu(sd->groups->cpumask)) {
  279. /*
  280. * Only add "power" once for each
  281. * physical package.
  282. */
  283. continue;
  284. }
  285. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  286. (cpus_weight(sd->groups->cpumask)-1) / 10;
  287. sg->cpu_power += power;
  288. }
  289. sg = sg->next;
  290. if (sg != sched_group_nodes[i])
  291. goto next_sg;
  292. }
  293. #endif
  294. /* Attach the domains */
  295. for_each_online_cpu(i) {
  296. struct sched_domain *sd;
  297. #ifdef CONFIG_SCHED_SMT
  298. sd = &per_cpu(cpu_domains, i);
  299. #else
  300. sd = &per_cpu(phys_domains, i);
  301. #endif
  302. cpu_attach_domain(sd, i);
  303. }
  304. }
  305. /*
  306. * Set up scheduler domains and groups. Callers must hold the hotplug lock.
  307. */
  308. void arch_init_sched_domains(const cpumask_t *cpu_map)
  309. {
  310. cpumask_t cpu_default_map;
  311. /*
  312. * Setup mask for cpus without special case scheduling requirements.
  313. * For now this just excludes isolated cpus, but could be used to
  314. * exclude other special cases in the future.
  315. */
  316. cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
  317. build_sched_domains(&cpu_default_map);
  318. }
  319. void arch_destroy_sched_domains(const cpumask_t *cpu_map)
  320. {
  321. #ifdef CONFIG_NUMA
  322. int i;
  323. for (i = 0; i < MAX_NUMNODES; i++) {
  324. cpumask_t nodemask = node_to_cpumask(i);
  325. struct sched_group *oldsg, *sg = sched_group_nodes[i];
  326. cpus_and(nodemask, nodemask, *cpu_map);
  327. if (cpus_empty(nodemask))
  328. continue;
  329. if (sg == NULL)
  330. continue;
  331. sg = sg->next;
  332. next_sg:
  333. oldsg = sg;
  334. sg = sg->next;
  335. kfree(oldsg);
  336. if (oldsg != sched_group_nodes[i])
  337. goto next_sg;
  338. sched_group_nodes[i] = NULL;
  339. }
  340. #endif
  341. }