|
@@ -1,444 +0,0 @@
|
|
|
-/*
|
|
|
- * arch/ia64/kernel/domain.c
|
|
|
- * Architecture specific sched-domains builder.
|
|
|
- *
|
|
|
- * Copyright (C) 2004 Jesse Barnes
|
|
|
- * Copyright (C) 2004 Silicon Graphics, Inc.
|
|
|
- */
|
|
|
-
|
|
|
-#include <linux/sched.h>
|
|
|
-#include <linux/percpu.h>
|
|
|
-#include <linux/slab.h>
|
|
|
-#include <linux/cpumask.h>
|
|
|
-#include <linux/init.h>
|
|
|
-#include <linux/topology.h>
|
|
|
-#include <linux/nodemask.h>
|
|
|
-
|
|
|
-#define SD_NODES_PER_DOMAIN 16
|
|
|
-
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
-/**
|
|
|
- * find_next_best_node - find the next node to include in a sched_domain
|
|
|
- * @node: node whose sched_domain we're building
|
|
|
- * @used_nodes: nodes already in the sched_domain
|
|
|
- *
|
|
|
- * Find the next node to include in a given scheduling domain. Simply
|
|
|
- * finds the closest node not already in the @used_nodes map.
|
|
|
- *
|
|
|
- * Should use nodemask_t.
|
|
|
- */
|
|
|
-static int find_next_best_node(int node, unsigned long *used_nodes)
|
|
|
-{
|
|
|
- int i, n, val, min_val, best_node = 0;
|
|
|
-
|
|
|
- min_val = INT_MAX;
|
|
|
-
|
|
|
- for (i = 0; i < MAX_NUMNODES; i++) {
|
|
|
- /* Start at @node */
|
|
|
- n = (node + i) % MAX_NUMNODES;
|
|
|
-
|
|
|
- if (!nr_cpus_node(n))
|
|
|
- continue;
|
|
|
-
|
|
|
- /* Skip already used nodes */
|
|
|
- if (test_bit(n, used_nodes))
|
|
|
- continue;
|
|
|
-
|
|
|
- /* Simple min distance search */
|
|
|
- val = node_distance(node, n);
|
|
|
-
|
|
|
- if (val < min_val) {
|
|
|
- min_val = val;
|
|
|
- best_node = n;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- set_bit(best_node, used_nodes);
|
|
|
- return best_node;
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * sched_domain_node_span - get a cpumask for a node's sched_domain
|
|
|
- * @node: node whose cpumask we're constructing
|
|
|
- * @size: number of nodes to include in this span
|
|
|
- *
|
|
|
- * Given a node, construct a good cpumask for its sched_domain to span. It
|
|
|
- * should be one that prevents unnecessary balancing, but also spreads tasks
|
|
|
- * out optimally.
|
|
|
- */
|
|
|
-static cpumask_t sched_domain_node_span(int node)
|
|
|
-{
|
|
|
- int i;
|
|
|
- cpumask_t span, nodemask;
|
|
|
- DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
|
|
|
-
|
|
|
- cpus_clear(span);
|
|
|
- bitmap_zero(used_nodes, MAX_NUMNODES);
|
|
|
-
|
|
|
- nodemask = node_to_cpumask(node);
|
|
|
- cpus_or(span, span, nodemask);
|
|
|
- set_bit(node, used_nodes);
|
|
|
-
|
|
|
- for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
|
|
|
- int next_node = find_next_best_node(node, used_nodes);
|
|
|
- nodemask = node_to_cpumask(next_node);
|
|
|
- cpus_or(span, span, nodemask);
|
|
|
- }
|
|
|
-
|
|
|
- return span;
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-/*
|
|
|
- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
|
|
|
- * can switch it on easily if needed.
|
|
|
- */
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
-static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
|
|
|
-static struct sched_group sched_group_cpus[NR_CPUS];
|
|
|
-static int cpu_to_cpu_group(int cpu)
|
|
|
-{
|
|
|
- return cpu;
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-static DEFINE_PER_CPU(struct sched_domain, phys_domains);
|
|
|
-static struct sched_group sched_group_phys[NR_CPUS];
|
|
|
-static int cpu_to_phys_group(int cpu)
|
|
|
-{
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- return first_cpu(cpu_sibling_map[cpu]);
|
|
|
-#else
|
|
|
- return cpu;
|
|
|
-#endif
|
|
|
-}
|
|
|
-
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
-/*
|
|
|
- * The init_sched_build_groups can't handle what we want to do with node
|
|
|
- * groups, so roll our own. Now each node has its own list of groups which
|
|
|
- * gets dynamically allocated.
|
|
|
- */
|
|
|
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
|
|
|
-static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
|
|
|
-
|
|
|
-static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
|
|
|
-static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
|
|
|
-
|
|
|
-static int cpu_to_allnodes_group(int cpu)
|
|
|
-{
|
|
|
- return cpu_to_node(cpu);
|
|
|
-}
|
|
|
-#endif
|
|
|
-
|
|
|
-/*
|
|
|
- * Build sched domains for a given set of cpus and attach the sched domains
|
|
|
- * to the individual cpus
|
|
|
- */
|
|
|
-void build_sched_domains(const cpumask_t *cpu_map)
|
|
|
-{
|
|
|
- int i;
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- struct sched_group **sched_group_nodes = NULL;
|
|
|
- struct sched_group *sched_group_allnodes = NULL;
|
|
|
-
|
|
|
- /*
|
|
|
- * Allocate the per-node list of sched groups
|
|
|
- */
|
|
|
- sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
|
|
|
- GFP_ATOMIC);
|
|
|
- if (!sched_group_nodes) {
|
|
|
- printk(KERN_WARNING "Can not alloc sched group node list\n");
|
|
|
- return;
|
|
|
- }
|
|
|
- sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
|
|
|
-#endif
|
|
|
-
|
|
|
- /*
|
|
|
- * Set up domains for cpus specified by the cpu_map.
|
|
|
- */
|
|
|
- for_each_cpu_mask(i, *cpu_map) {
|
|
|
- int group;
|
|
|
- struct sched_domain *sd = NULL, *p;
|
|
|
- cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
|
|
|
-
|
|
|
- cpus_and(nodemask, nodemask, *cpu_map);
|
|
|
-
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- if (cpus_weight(*cpu_map)
|
|
|
- > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
|
|
|
- if (!sched_group_allnodes) {
|
|
|
- sched_group_allnodes
|
|
|
- = kmalloc(sizeof(struct sched_group)
|
|
|
- * MAX_NUMNODES,
|
|
|
- GFP_KERNEL);
|
|
|
- if (!sched_group_allnodes) {
|
|
|
- printk(KERN_WARNING
|
|
|
- "Can not alloc allnodes sched group\n");
|
|
|
- break;
|
|
|
- }
|
|
|
- sched_group_allnodes_bycpu[i]
|
|
|
- = sched_group_allnodes;
|
|
|
- }
|
|
|
- sd = &per_cpu(allnodes_domains, i);
|
|
|
- *sd = SD_ALLNODES_INIT;
|
|
|
- sd->span = *cpu_map;
|
|
|
- group = cpu_to_allnodes_group(i);
|
|
|
- sd->groups = &sched_group_allnodes[group];
|
|
|
- p = sd;
|
|
|
- } else
|
|
|
- p = NULL;
|
|
|
-
|
|
|
- sd = &per_cpu(node_domains, i);
|
|
|
- *sd = SD_NODE_INIT;
|
|
|
- sd->span = sched_domain_node_span(cpu_to_node(i));
|
|
|
- sd->parent = p;
|
|
|
- cpus_and(sd->span, sd->span, *cpu_map);
|
|
|
-#endif
|
|
|
-
|
|
|
- p = sd;
|
|
|
- sd = &per_cpu(phys_domains, i);
|
|
|
- group = cpu_to_phys_group(i);
|
|
|
- *sd = SD_CPU_INIT;
|
|
|
- sd->span = nodemask;
|
|
|
- sd->parent = p;
|
|
|
- sd->groups = &sched_group_phys[group];
|
|
|
-
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- p = sd;
|
|
|
- sd = &per_cpu(cpu_domains, i);
|
|
|
- group = cpu_to_cpu_group(i);
|
|
|
- *sd = SD_SIBLING_INIT;
|
|
|
- sd->span = cpu_sibling_map[i];
|
|
|
- cpus_and(sd->span, sd->span, *cpu_map);
|
|
|
- sd->parent = p;
|
|
|
- sd->groups = &sched_group_cpus[group];
|
|
|
-#endif
|
|
|
- }
|
|
|
-
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- /* Set up CPU (sibling) groups */
|
|
|
- for_each_cpu_mask(i, *cpu_map) {
|
|
|
- cpumask_t this_sibling_map = cpu_sibling_map[i];
|
|
|
- cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
|
|
|
- if (i != first_cpu(this_sibling_map))
|
|
|
- continue;
|
|
|
-
|
|
|
- init_sched_build_groups(sched_group_cpus, this_sibling_map,
|
|
|
- &cpu_to_cpu_group);
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
- /* Set up physical groups */
|
|
|
- for (i = 0; i < MAX_NUMNODES; i++) {
|
|
|
- cpumask_t nodemask = node_to_cpumask(i);
|
|
|
-
|
|
|
- cpus_and(nodemask, nodemask, *cpu_map);
|
|
|
- if (cpus_empty(nodemask))
|
|
|
- continue;
|
|
|
-
|
|
|
- init_sched_build_groups(sched_group_phys, nodemask,
|
|
|
- &cpu_to_phys_group);
|
|
|
- }
|
|
|
-
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- if (sched_group_allnodes)
|
|
|
- init_sched_build_groups(sched_group_allnodes, *cpu_map,
|
|
|
- &cpu_to_allnodes_group);
|
|
|
-
|
|
|
- for (i = 0; i < MAX_NUMNODES; i++) {
|
|
|
- /* Set up node groups */
|
|
|
- struct sched_group *sg, *prev;
|
|
|
- cpumask_t nodemask = node_to_cpumask(i);
|
|
|
- cpumask_t domainspan;
|
|
|
- cpumask_t covered = CPU_MASK_NONE;
|
|
|
- int j;
|
|
|
-
|
|
|
- cpus_and(nodemask, nodemask, *cpu_map);
|
|
|
- if (cpus_empty(nodemask)) {
|
|
|
- sched_group_nodes[i] = NULL;
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- domainspan = sched_domain_node_span(i);
|
|
|
- cpus_and(domainspan, domainspan, *cpu_map);
|
|
|
-
|
|
|
- sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
|
|
|
- sched_group_nodes[i] = sg;
|
|
|
- for_each_cpu_mask(j, nodemask) {
|
|
|
- struct sched_domain *sd;
|
|
|
- sd = &per_cpu(node_domains, j);
|
|
|
- sd->groups = sg;
|
|
|
- if (sd->groups == NULL) {
|
|
|
- /* Turn off balancing if we have no groups */
|
|
|
- sd->flags = 0;
|
|
|
- }
|
|
|
- }
|
|
|
- if (!sg) {
|
|
|
- printk(KERN_WARNING
|
|
|
- "Can not alloc domain group for node %d\n", i);
|
|
|
- continue;
|
|
|
- }
|
|
|
- sg->cpu_power = 0;
|
|
|
- sg->cpumask = nodemask;
|
|
|
- cpus_or(covered, covered, nodemask);
|
|
|
- prev = sg;
|
|
|
-
|
|
|
- for (j = 0; j < MAX_NUMNODES; j++) {
|
|
|
- cpumask_t tmp, notcovered;
|
|
|
- int n = (i + j) % MAX_NUMNODES;
|
|
|
-
|
|
|
- cpus_complement(notcovered, covered);
|
|
|
- cpus_and(tmp, notcovered, *cpu_map);
|
|
|
- cpus_and(tmp, tmp, domainspan);
|
|
|
- if (cpus_empty(tmp))
|
|
|
- break;
|
|
|
-
|
|
|
- nodemask = node_to_cpumask(n);
|
|
|
- cpus_and(tmp, tmp, nodemask);
|
|
|
- if (cpus_empty(tmp))
|
|
|
- continue;
|
|
|
-
|
|
|
- sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
|
|
|
- if (!sg) {
|
|
|
- printk(KERN_WARNING
|
|
|
- "Can not alloc domain group for node %d\n", j);
|
|
|
- break;
|
|
|
- }
|
|
|
- sg->cpu_power = 0;
|
|
|
- sg->cpumask = tmp;
|
|
|
- cpus_or(covered, covered, tmp);
|
|
|
- prev->next = sg;
|
|
|
- prev = sg;
|
|
|
- }
|
|
|
- prev->next = sched_group_nodes[i];
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
- /* Calculate CPU power for physical packages and nodes */
|
|
|
- for_each_cpu_mask(i, *cpu_map) {
|
|
|
- int power;
|
|
|
- struct sched_domain *sd;
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- sd = &per_cpu(cpu_domains, i);
|
|
|
- power = SCHED_LOAD_SCALE;
|
|
|
- sd->groups->cpu_power = power;
|
|
|
-#endif
|
|
|
-
|
|
|
- sd = &per_cpu(phys_domains, i);
|
|
|
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
|
|
|
- (cpus_weight(sd->groups->cpumask)-1) / 10;
|
|
|
- sd->groups->cpu_power = power;
|
|
|
-
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- sd = &per_cpu(allnodes_domains, i);
|
|
|
- if (sd->groups) {
|
|
|
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
|
|
|
- (cpus_weight(sd->groups->cpumask)-1) / 10;
|
|
|
- sd->groups->cpu_power = power;
|
|
|
- }
|
|
|
-#endif
|
|
|
- }
|
|
|
-
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- for (i = 0; i < MAX_NUMNODES; i++) {
|
|
|
- struct sched_group *sg = sched_group_nodes[i];
|
|
|
- int j;
|
|
|
-
|
|
|
- if (sg == NULL)
|
|
|
- continue;
|
|
|
-next_sg:
|
|
|
- for_each_cpu_mask(j, sg->cpumask) {
|
|
|
- struct sched_domain *sd;
|
|
|
- int power;
|
|
|
-
|
|
|
- sd = &per_cpu(phys_domains, j);
|
|
|
- if (j != first_cpu(sd->groups->cpumask)) {
|
|
|
- /*
|
|
|
- * Only add "power" once for each
|
|
|
- * physical package.
|
|
|
- */
|
|
|
- continue;
|
|
|
- }
|
|
|
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
|
|
|
- (cpus_weight(sd->groups->cpumask)-1) / 10;
|
|
|
-
|
|
|
- sg->cpu_power += power;
|
|
|
- }
|
|
|
- sg = sg->next;
|
|
|
- if (sg != sched_group_nodes[i])
|
|
|
- goto next_sg;
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
- /* Attach the domains */
|
|
|
- for_each_cpu_mask(i, *cpu_map) {
|
|
|
- struct sched_domain *sd;
|
|
|
-#ifdef CONFIG_SCHED_SMT
|
|
|
- sd = &per_cpu(cpu_domains, i);
|
|
|
-#else
|
|
|
- sd = &per_cpu(phys_domains, i);
|
|
|
-#endif
|
|
|
- cpu_attach_domain(sd, i);
|
|
|
- }
|
|
|
-}
|
|
|
-/*
|
|
|
- * Set up scheduler domains and groups. Callers must hold the hotplug lock.
|
|
|
- */
|
|
|
-void arch_init_sched_domains(const cpumask_t *cpu_map)
|
|
|
-{
|
|
|
- cpumask_t cpu_default_map;
|
|
|
-
|
|
|
- /*
|
|
|
- * Setup mask for cpus without special case scheduling requirements.
|
|
|
- * For now this just excludes isolated cpus, but could be used to
|
|
|
- * exclude other special cases in the future.
|
|
|
- */
|
|
|
- cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
|
|
|
-
|
|
|
- build_sched_domains(&cpu_default_map);
|
|
|
-}
|
|
|
-
|
|
|
-void arch_destroy_sched_domains(const cpumask_t *cpu_map)
|
|
|
-{
|
|
|
-#ifdef CONFIG_NUMA
|
|
|
- int i;
|
|
|
- int cpu;
|
|
|
-
|
|
|
- for_each_cpu_mask(cpu, *cpu_map) {
|
|
|
- struct sched_group *sched_group_allnodes
|
|
|
- = sched_group_allnodes_bycpu[cpu];
|
|
|
- struct sched_group **sched_group_nodes
|
|
|
- = sched_group_nodes_bycpu[cpu];
|
|
|
-
|
|
|
- if (sched_group_allnodes) {
|
|
|
- kfree(sched_group_allnodes);
|
|
|
- sched_group_allnodes_bycpu[cpu] = NULL;
|
|
|
- }
|
|
|
-
|
|
|
- if (!sched_group_nodes)
|
|
|
- continue;
|
|
|
-
|
|
|
- for (i = 0; i < MAX_NUMNODES; i++) {
|
|
|
- cpumask_t nodemask = node_to_cpumask(i);
|
|
|
- struct sched_group *oldsg, *sg = sched_group_nodes[i];
|
|
|
-
|
|
|
- cpus_and(nodemask, nodemask, *cpu_map);
|
|
|
- if (cpus_empty(nodemask))
|
|
|
- continue;
|
|
|
-
|
|
|
- if (sg == NULL)
|
|
|
- continue;
|
|
|
- sg = sg->next;
|
|
|
-next_sg:
|
|
|
- oldsg = sg;
|
|
|
- sg = sg->next;
|
|
|
- kfree(oldsg);
|
|
|
- if (oldsg != sched_group_nodes[i])
|
|
|
- goto next_sg;
|
|
|
- }
|
|
|
- kfree(sched_group_nodes);
|
|
|
- sched_group_nodes_bycpu[cpu] = NULL;
|
|
|
- }
|
|
|
-#endif
|
|
|
-}
|