domain.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. * arch/ia64/kernel/domain.c
  3. * Architecture specific sched-domains builder.
  4. *
  5. * Copyright (C) 2004 Jesse Barnes
  6. * Copyright (C) 2004 Silicon Graphics, Inc.
  7. */
  8. #include <linux/sched.h>
  9. #include <linux/percpu.h>
  10. #include <linux/slab.h>
  11. #include <linux/cpumask.h>
  12. #include <linux/init.h>
  13. #include <linux/topology.h>
  14. #include <linux/nodemask.h>
  15. #define SD_NODES_PER_DOMAIN 16
  16. #ifdef CONFIG_NUMA
  17. /**
  18. * find_next_best_node - find the next node to include in a sched_domain
  19. * @node: node whose sched_domain we're building
  20. * @used_nodes: nodes already in the sched_domain
  21. *
  22. * Find the next node to include in a given scheduling domain. Simply
  23. * finds the closest node not already in the @used_nodes map.
  24. *
  25. * Should use nodemask_t.
  26. */
  27. static int find_next_best_node(int node, unsigned long *used_nodes)
  28. {
  29. int i, n, val, min_val, best_node = 0;
  30. min_val = INT_MAX;
  31. for (i = 0; i < MAX_NUMNODES; i++) {
  32. /* Start at @node */
  33. n = (node + i) % MAX_NUMNODES;
  34. if (!nr_cpus_node(n))
  35. continue;
  36. /* Skip already used nodes */
  37. if (test_bit(n, used_nodes))
  38. continue;
  39. /* Simple min distance search */
  40. val = node_distance(node, n);
  41. if (val < min_val) {
  42. min_val = val;
  43. best_node = n;
  44. }
  45. }
  46. set_bit(best_node, used_nodes);
  47. return best_node;
  48. }
  49. /**
  50. * sched_domain_node_span - get a cpumask for a node's sched_domain
  51. * @node: node whose cpumask we're constructing
  52. * @size: number of nodes to include in this span
  53. *
  54. * Given a node, construct a good cpumask for its sched_domain to span. It
  55. * should be one that prevents unnecessary balancing, but also spreads tasks
  56. * out optimally.
  57. */
  58. static cpumask_t sched_domain_node_span(int node)
  59. {
  60. int i;
  61. cpumask_t span, nodemask;
  62. DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
  63. cpus_clear(span);
  64. bitmap_zero(used_nodes, MAX_NUMNODES);
  65. nodemask = node_to_cpumask(node);
  66. cpus_or(span, span, nodemask);
  67. set_bit(node, used_nodes);
  68. for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
  69. int next_node = find_next_best_node(node, used_nodes);
  70. nodemask = node_to_cpumask(next_node);
  71. cpus_or(span, span, nodemask);
  72. }
  73. return span;
  74. }
  75. #endif
  76. /*
  77. * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
  78. * can switch it on easily if needed.
  79. */
  80. #ifdef CONFIG_SCHED_SMT
  81. static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
  82. static struct sched_group sched_group_cpus[NR_CPUS];
  83. static int cpu_to_cpu_group(int cpu)
  84. {
  85. return cpu;
  86. }
  87. #endif
  88. static DEFINE_PER_CPU(struct sched_domain, phys_domains);
  89. static struct sched_group sched_group_phys[NR_CPUS];
  90. static int cpu_to_phys_group(int cpu)
  91. {
  92. #ifdef CONFIG_SCHED_SMT
  93. return first_cpu(cpu_sibling_map[cpu]);
  94. #else
  95. return cpu;
  96. #endif
  97. }
  98. #ifdef CONFIG_NUMA
  99. /*
  100. * The init_sched_build_groups can't handle what we want to do with node
  101. * groups, so roll our own. Now each node has its own list of groups which
  102. * gets dynamically allocated.
  103. */
  104. static DEFINE_PER_CPU(struct sched_domain, node_domains);
  105. static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
  106. static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
  107. static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
  108. static int cpu_to_allnodes_group(int cpu)
  109. {
  110. return cpu_to_node(cpu);
  111. }
  112. #endif
  113. /*
  114. * Build sched domains for a given set of cpus and attach the sched domains
  115. * to the individual cpus
  116. */
  117. void build_sched_domains(const cpumask_t *cpu_map)
  118. {
  119. int i;
  120. #ifdef CONFIG_NUMA
  121. struct sched_group **sched_group_nodes = NULL;
  122. struct sched_group *sched_group_allnodes = NULL;
  123. /*
  124. * Allocate the per-node list of sched groups
  125. */
  126. sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
  127. GFP_ATOMIC);
  128. if (!sched_group_nodes) {
  129. printk(KERN_WARNING "Can not alloc sched group node list\n");
  130. return;
  131. }
  132. sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
  133. #endif
  134. /*
  135. * Set up domains for cpus specified by the cpu_map.
  136. */
  137. for_each_cpu_mask(i, *cpu_map) {
  138. int group;
  139. struct sched_domain *sd = NULL, *p;
  140. cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
  141. cpus_and(nodemask, nodemask, *cpu_map);
  142. #ifdef CONFIG_NUMA
  143. if (cpus_weight(*cpu_map)
  144. > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
  145. if (!sched_group_allnodes) {
  146. sched_group_allnodes
  147. = kmalloc(sizeof(struct sched_group)
  148. * MAX_NUMNODES,
  149. GFP_KERNEL);
  150. if (!sched_group_allnodes) {
  151. printk(KERN_WARNING
  152. "Can not alloc allnodes sched group\n");
  153. break;
  154. }
  155. sched_group_allnodes_bycpu[i]
  156. = sched_group_allnodes;
  157. }
  158. sd = &per_cpu(allnodes_domains, i);
  159. *sd = SD_ALLNODES_INIT;
  160. sd->span = *cpu_map;
  161. group = cpu_to_allnodes_group(i);
  162. sd->groups = &sched_group_allnodes[group];
  163. p = sd;
  164. } else
  165. p = NULL;
  166. sd = &per_cpu(node_domains, i);
  167. *sd = SD_NODE_INIT;
  168. sd->span = sched_domain_node_span(cpu_to_node(i));
  169. sd->parent = p;
  170. cpus_and(sd->span, sd->span, *cpu_map);
  171. #endif
  172. p = sd;
  173. sd = &per_cpu(phys_domains, i);
  174. group = cpu_to_phys_group(i);
  175. *sd = SD_CPU_INIT;
  176. sd->span = nodemask;
  177. sd->parent = p;
  178. sd->groups = &sched_group_phys[group];
  179. #ifdef CONFIG_SCHED_SMT
  180. p = sd;
  181. sd = &per_cpu(cpu_domains, i);
  182. group = cpu_to_cpu_group(i);
  183. *sd = SD_SIBLING_INIT;
  184. sd->span = cpu_sibling_map[i];
  185. cpus_and(sd->span, sd->span, *cpu_map);
  186. sd->parent = p;
  187. sd->groups = &sched_group_cpus[group];
  188. #endif
  189. }
  190. #ifdef CONFIG_SCHED_SMT
  191. /* Set up CPU (sibling) groups */
  192. for_each_cpu_mask(i, *cpu_map) {
  193. cpumask_t this_sibling_map = cpu_sibling_map[i];
  194. cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
  195. if (i != first_cpu(this_sibling_map))
  196. continue;
  197. init_sched_build_groups(sched_group_cpus, this_sibling_map,
  198. &cpu_to_cpu_group);
  199. }
  200. #endif
  201. /* Set up physical groups */
  202. for (i = 0; i < MAX_NUMNODES; i++) {
  203. cpumask_t nodemask = node_to_cpumask(i);
  204. cpus_and(nodemask, nodemask, *cpu_map);
  205. if (cpus_empty(nodemask))
  206. continue;
  207. init_sched_build_groups(sched_group_phys, nodemask,
  208. &cpu_to_phys_group);
  209. }
  210. #ifdef CONFIG_NUMA
  211. if (sched_group_allnodes)
  212. init_sched_build_groups(sched_group_allnodes, *cpu_map,
  213. &cpu_to_allnodes_group);
  214. for (i = 0; i < MAX_NUMNODES; i++) {
  215. /* Set up node groups */
  216. struct sched_group *sg, *prev;
  217. cpumask_t nodemask = node_to_cpumask(i);
  218. cpumask_t domainspan;
  219. cpumask_t covered = CPU_MASK_NONE;
  220. int j;
  221. cpus_and(nodemask, nodemask, *cpu_map);
  222. if (cpus_empty(nodemask)) {
  223. sched_group_nodes[i] = NULL;
  224. continue;
  225. }
  226. domainspan = sched_domain_node_span(i);
  227. cpus_and(domainspan, domainspan, *cpu_map);
  228. sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
  229. sched_group_nodes[i] = sg;
  230. for_each_cpu_mask(j, nodemask) {
  231. struct sched_domain *sd;
  232. sd = &per_cpu(node_domains, j);
  233. sd->groups = sg;
  234. if (sd->groups == NULL) {
  235. /* Turn off balancing if we have no groups */
  236. sd->flags = 0;
  237. }
  238. }
  239. if (!sg) {
  240. printk(KERN_WARNING
  241. "Can not alloc domain group for node %d\n", i);
  242. continue;
  243. }
  244. sg->cpu_power = 0;
  245. sg->cpumask = nodemask;
  246. cpus_or(covered, covered, nodemask);
  247. prev = sg;
  248. for (j = 0; j < MAX_NUMNODES; j++) {
  249. cpumask_t tmp, notcovered;
  250. int n = (i + j) % MAX_NUMNODES;
  251. cpus_complement(notcovered, covered);
  252. cpus_and(tmp, notcovered, *cpu_map);
  253. cpus_and(tmp, tmp, domainspan);
  254. if (cpus_empty(tmp))
  255. break;
  256. nodemask = node_to_cpumask(n);
  257. cpus_and(tmp, tmp, nodemask);
  258. if (cpus_empty(tmp))
  259. continue;
  260. sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
  261. if (!sg) {
  262. printk(KERN_WARNING
  263. "Can not alloc domain group for node %d\n", j);
  264. break;
  265. }
  266. sg->cpu_power = 0;
  267. sg->cpumask = tmp;
  268. cpus_or(covered, covered, tmp);
  269. prev->next = sg;
  270. prev = sg;
  271. }
  272. prev->next = sched_group_nodes[i];
  273. }
  274. #endif
  275. /* Calculate CPU power for physical packages and nodes */
  276. for_each_cpu_mask(i, *cpu_map) {
  277. int power;
  278. struct sched_domain *sd;
  279. #ifdef CONFIG_SCHED_SMT
  280. sd = &per_cpu(cpu_domains, i);
  281. power = SCHED_LOAD_SCALE;
  282. sd->groups->cpu_power = power;
  283. #endif
  284. sd = &per_cpu(phys_domains, i);
  285. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  286. (cpus_weight(sd->groups->cpumask)-1) / 10;
  287. sd->groups->cpu_power = power;
  288. #ifdef CONFIG_NUMA
  289. sd = &per_cpu(allnodes_domains, i);
  290. if (sd->groups) {
  291. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  292. (cpus_weight(sd->groups->cpumask)-1) / 10;
  293. sd->groups->cpu_power = power;
  294. }
  295. #endif
  296. }
  297. #ifdef CONFIG_NUMA
  298. for (i = 0; i < MAX_NUMNODES; i++) {
  299. struct sched_group *sg = sched_group_nodes[i];
  300. int j;
  301. if (sg == NULL)
  302. continue;
  303. next_sg:
  304. for_each_cpu_mask(j, sg->cpumask) {
  305. struct sched_domain *sd;
  306. int power;
  307. sd = &per_cpu(phys_domains, j);
  308. if (j != first_cpu(sd->groups->cpumask)) {
  309. /*
  310. * Only add "power" once for each
  311. * physical package.
  312. */
  313. continue;
  314. }
  315. power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
  316. (cpus_weight(sd->groups->cpumask)-1) / 10;
  317. sg->cpu_power += power;
  318. }
  319. sg = sg->next;
  320. if (sg != sched_group_nodes[i])
  321. goto next_sg;
  322. }
  323. #endif
  324. /* Attach the domains */
  325. for_each_cpu_mask(i, *cpu_map) {
  326. struct sched_domain *sd;
  327. #ifdef CONFIG_SCHED_SMT
  328. sd = &per_cpu(cpu_domains, i);
  329. #else
  330. sd = &per_cpu(phys_domains, i);
  331. #endif
  332. cpu_attach_domain(sd, i);
  333. }
  334. }
  335. /*
  336. * Set up scheduler domains and groups. Callers must hold the hotplug lock.
  337. */
  338. void arch_init_sched_domains(const cpumask_t *cpu_map)
  339. {
  340. cpumask_t cpu_default_map;
  341. /*
  342. * Setup mask for cpus without special case scheduling requirements.
  343. * For now this just excludes isolated cpus, but could be used to
  344. * exclude other special cases in the future.
  345. */
  346. cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
  347. build_sched_domains(&cpu_default_map);
  348. }
  349. void arch_destroy_sched_domains(const cpumask_t *cpu_map)
  350. {
  351. #ifdef CONFIG_NUMA
  352. int i;
  353. int cpu;
  354. for_each_cpu_mask(cpu, *cpu_map) {
  355. struct sched_group *sched_group_allnodes
  356. = sched_group_allnodes_bycpu[cpu];
  357. struct sched_group **sched_group_nodes
  358. = sched_group_nodes_bycpu[cpu];
  359. if (sched_group_allnodes) {
  360. kfree(sched_group_allnodes);
  361. sched_group_allnodes_bycpu[cpu] = NULL;
  362. }
  363. if (!sched_group_nodes)
  364. continue;
  365. for (i = 0; i < MAX_NUMNODES; i++) {
  366. cpumask_t nodemask = node_to_cpumask(i);
  367. struct sched_group *oldsg, *sg = sched_group_nodes[i];
  368. cpus_and(nodemask, nodemask, *cpu_map);
  369. if (cpus_empty(nodemask))
  370. continue;
  371. if (sg == NULL)
  372. continue;
  373. sg = sg->next;
  374. next_sg:
  375. oldsg = sg;
  376. sg = sg->next;
  377. kfree(oldsg);
  378. if (oldsg != sched_group_nodes[i])
  379. goto next_sg;
  380. }
  381. kfree(sched_group_nodes);
  382. sched_group_nodes_bycpu[cpu] = NULL;
  383. }
  384. #endif
  385. }