123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 |
- /*
- * arch/ia64/kernel/domain.c
- * Architecture specific sched-domains builder.
- *
- * Copyright (C) 2004 Jesse Barnes
- * Copyright (C) 2004 Silicon Graphics, Inc.
- */
- #include <linux/sched.h>
- #include <linux/percpu.h>
- #include <linux/slab.h>
- #include <linux/cpumask.h>
- #include <linux/init.h>
- #include <linux/topology.h>
- #include <linux/nodemask.h>
- #define SD_NODES_PER_DOMAIN 16
- #ifdef CONFIG_NUMA
- /**
- * find_next_best_node - find the next node to include in a sched_domain
- * @node: node whose sched_domain we're building
- * @used_nodes: nodes already in the sched_domain
- *
- * Find the next node to include in a given scheduling domain. Simply
- * finds the closest node not already in the @used_nodes map.
- *
- * Should use nodemask_t.
- */
- static int find_next_best_node(int node, unsigned long *used_nodes)
- {
- int i, n, val, min_val, best_node = 0;
- min_val = INT_MAX;
- for (i = 0; i < MAX_NUMNODES; i++) {
- /* Start at @node */
- n = (node + i) % MAX_NUMNODES;
- if (!nr_cpus_node(n))
- continue;
- /* Skip already used nodes */
- if (test_bit(n, used_nodes))
- continue;
- /* Simple min distance search */
- val = node_distance(node, n);
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
- set_bit(best_node, used_nodes);
- return best_node;
- }
- /**
- * sched_domain_node_span - get a cpumask for a node's sched_domain
- * @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
- *
- * Given a node, construct a good cpumask for its sched_domain to span. It
- * should be one that prevents unnecessary balancing, but also spreads tasks
- * out optimally.
- */
- static cpumask_t sched_domain_node_span(int node)
- {
- int i;
- cpumask_t span, nodemask;
- DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
- cpus_clear(span);
- bitmap_zero(used_nodes, MAX_NUMNODES);
- nodemask = node_to_cpumask(node);
- cpus_or(span, span, nodemask);
- set_bit(node, used_nodes);
- for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
- int next_node = find_next_best_node(node, used_nodes);
- nodemask = node_to_cpumask(next_node);
- cpus_or(span, span, nodemask);
- }
- return span;
- }
- #endif
- /*
- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
- * can switch it on easily if needed.
- */
- #ifdef CONFIG_SCHED_SMT
- static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
- static struct sched_group sched_group_cpus[NR_CPUS];
- static int cpu_to_cpu_group(int cpu)
- {
- return cpu;
- }
- #endif
- static DEFINE_PER_CPU(struct sched_domain, phys_domains);
- static struct sched_group sched_group_phys[NR_CPUS];
- static int cpu_to_phys_group(int cpu)
- {
- #ifdef CONFIG_SCHED_SMT
- return first_cpu(cpu_sibling_map[cpu]);
- #else
- return cpu;
- #endif
- }
- #ifdef CONFIG_NUMA
- /*
- * The init_sched_build_groups can't handle what we want to do with node
- * groups, so roll our own. Now each node has its own list of groups which
- * gets dynamically allocated.
- */
- static DEFINE_PER_CPU(struct sched_domain, node_domains);
- static struct sched_group *sched_group_nodes[MAX_NUMNODES];
- static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
- static struct sched_group sched_group_allnodes[MAX_NUMNODES];
- static int cpu_to_allnodes_group(int cpu)
- {
- return cpu_to_node(cpu);
- }
- #endif
- /*
- * Build sched domains for a given set of cpus and attach the sched domains
- * to the individual cpus
- */
- void build_sched_domains(const cpumask_t *cpu_map)
- {
- int i;
- /*
- * Set up domains for cpus specified by the cpu_map.
- */
- for_each_cpu_mask(i, *cpu_map) {
- int group;
- struct sched_domain *sd = NULL, *p;
- cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
- cpus_and(nodemask, nodemask, *cpu_map);
- #ifdef CONFIG_NUMA
- if (num_online_cpus()
- > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
- sd = &per_cpu(allnodes_domains, i);
- *sd = SD_ALLNODES_INIT;
- sd->span = *cpu_map;
- group = cpu_to_allnodes_group(i);
- sd->groups = &sched_group_allnodes[group];
- p = sd;
- } else
- p = NULL;
- sd = &per_cpu(node_domains, i);
- *sd = SD_NODE_INIT;
- sd->span = sched_domain_node_span(cpu_to_node(i));
- sd->parent = p;
- cpus_and(sd->span, sd->span, *cpu_map);
- #endif
- p = sd;
- sd = &per_cpu(phys_domains, i);
- group = cpu_to_phys_group(i);
- *sd = SD_CPU_INIT;
- sd->span = nodemask;
- sd->parent = p;
- sd->groups = &sched_group_phys[group];
- #ifdef CONFIG_SCHED_SMT
- p = sd;
- sd = &per_cpu(cpu_domains, i);
- group = cpu_to_cpu_group(i);
- *sd = SD_SIBLING_INIT;
- sd->span = cpu_sibling_map[i];
- cpus_and(sd->span, sd->span, *cpu_map);
- sd->parent = p;
- sd->groups = &sched_group_cpus[group];
- #endif
- }
- #ifdef CONFIG_SCHED_SMT
- /* Set up CPU (sibling) groups */
- for_each_cpu_mask(i, *cpu_map) {
- cpumask_t this_sibling_map = cpu_sibling_map[i];
- cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
- if (i != first_cpu(this_sibling_map))
- continue;
- init_sched_build_groups(sched_group_cpus, this_sibling_map,
- &cpu_to_cpu_group);
- }
- #endif
- /* Set up physical groups */
- for (i = 0; i < MAX_NUMNODES; i++) {
- cpumask_t nodemask = node_to_cpumask(i);
- cpus_and(nodemask, nodemask, *cpu_map);
- if (cpus_empty(nodemask))
- continue;
- init_sched_build_groups(sched_group_phys, nodemask,
- &cpu_to_phys_group);
- }
- #ifdef CONFIG_NUMA
- init_sched_build_groups(sched_group_allnodes, *cpu_map,
- &cpu_to_allnodes_group);
- for (i = 0; i < MAX_NUMNODES; i++) {
- /* Set up node groups */
- struct sched_group *sg, *prev;
- cpumask_t nodemask = node_to_cpumask(i);
- cpumask_t domainspan;
- cpumask_t covered = CPU_MASK_NONE;
- int j;
- cpus_and(nodemask, nodemask, *cpu_map);
- if (cpus_empty(nodemask))
- continue;
- domainspan = sched_domain_node_span(i);
- cpus_and(domainspan, domainspan, *cpu_map);
- sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
- sched_group_nodes[i] = sg;
- for_each_cpu_mask(j, nodemask) {
- struct sched_domain *sd;
- sd = &per_cpu(node_domains, j);
- sd->groups = sg;
- if (sd->groups == NULL) {
- /* Turn off balancing if we have no groups */
- sd->flags = 0;
- }
- }
- if (!sg) {
- printk(KERN_WARNING
- "Can not alloc domain group for node %d\n", i);
- continue;
- }
- sg->cpu_power = 0;
- sg->cpumask = nodemask;
- cpus_or(covered, covered, nodemask);
- prev = sg;
- for (j = 0; j < MAX_NUMNODES; j++) {
- cpumask_t tmp, notcovered;
- int n = (i + j) % MAX_NUMNODES;
- cpus_complement(notcovered, covered);
- cpus_and(tmp, notcovered, *cpu_map);
- cpus_and(tmp, tmp, domainspan);
- if (cpus_empty(tmp))
- break;
- nodemask = node_to_cpumask(n);
- cpus_and(tmp, tmp, nodemask);
- if (cpus_empty(tmp))
- continue;
- sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
- if (!sg) {
- printk(KERN_WARNING
- "Can not alloc domain group for node %d\n", j);
- break;
- }
- sg->cpu_power = 0;
- sg->cpumask = tmp;
- cpus_or(covered, covered, tmp);
- prev->next = sg;
- prev = sg;
- }
- prev->next = sched_group_nodes[i];
- }
- #endif
- /* Calculate CPU power for physical packages and nodes */
- for_each_cpu_mask(i, *cpu_map) {
- int power;
- struct sched_domain *sd;
- #ifdef CONFIG_SCHED_SMT
- sd = &per_cpu(cpu_domains, i);
- power = SCHED_LOAD_SCALE;
- sd->groups->cpu_power = power;
- #endif
- sd = &per_cpu(phys_domains, i);
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
- (cpus_weight(sd->groups->cpumask)-1) / 10;
- sd->groups->cpu_power = power;
- #ifdef CONFIG_NUMA
- sd = &per_cpu(allnodes_domains, i);
- if (sd->groups) {
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
- (cpus_weight(sd->groups->cpumask)-1) / 10;
- sd->groups->cpu_power = power;
- }
- #endif
- }
- #ifdef CONFIG_NUMA
- for (i = 0; i < MAX_NUMNODES; i++) {
- struct sched_group *sg = sched_group_nodes[i];
- int j;
- if (sg == NULL)
- continue;
- next_sg:
- for_each_cpu_mask(j, sg->cpumask) {
- struct sched_domain *sd;
- int power;
- sd = &per_cpu(phys_domains, j);
- if (j != first_cpu(sd->groups->cpumask)) {
- /*
- * Only add "power" once for each
- * physical package.
- */
- continue;
- }
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
- (cpus_weight(sd->groups->cpumask)-1) / 10;
- sg->cpu_power += power;
- }
- sg = sg->next;
- if (sg != sched_group_nodes[i])
- goto next_sg;
- }
- #endif
- /* Attach the domains */
- for_each_online_cpu(i) {
- struct sched_domain *sd;
- #ifdef CONFIG_SCHED_SMT
- sd = &per_cpu(cpu_domains, i);
- #else
- sd = &per_cpu(phys_domains, i);
- #endif
- cpu_attach_domain(sd, i);
- }
- }
- /*
- * Set up scheduler domains and groups. Callers must hold the hotplug lock.
- */
- void arch_init_sched_domains(const cpumask_t *cpu_map)
- {
- cpumask_t cpu_default_map;
- /*
- * Setup mask for cpus without special case scheduling requirements.
- * For now this just excludes isolated cpus, but could be used to
- * exclude other special cases in the future.
- */
- cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
- build_sched_domains(&cpu_default_map);
- }
- void arch_destroy_sched_domains(const cpumask_t *cpu_map)
- {
- #ifdef CONFIG_NUMA
- int i;
- for (i = 0; i < MAX_NUMNODES; i++) {
- cpumask_t nodemask = node_to_cpumask(i);
- struct sched_group *oldsg, *sg = sched_group_nodes[i];
- cpus_and(nodemask, nodemask, *cpu_map);
- if (cpus_empty(nodemask))
- continue;
- if (sg == NULL)
- continue;
- sg = sg->next;
- next_sg:
- oldsg = sg;
- sg = sg->next;
- kfree(oldsg);
- if (oldsg != sched_group_nodes[i])
- goto next_sg;
- sched_group_nodes[i] = NULL;
- }
- #endif
- }
|