|
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
|
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
}
|
|
|
|
|
|
-#if defined(CONFIG_IRQBALANCE)
|
|
|
-# include <asm/processor.h> /* kernel_thread() */
|
|
|
-# include <linux/kernel_stat.h> /* kstat */
|
|
|
-# include <linux/slab.h> /* kmalloc() */
|
|
|
-# include <linux/timer.h>
|
|
|
-
|
|
|
-#define IRQBALANCE_CHECK_ARCH -999
|
|
|
-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
|
|
|
-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
|
|
|
-#define BALANCED_IRQ_MORE_DELTA (HZ/10)
|
|
|
-#define BALANCED_IRQ_LESS_DELTA (HZ)
|
|
|
-
|
|
|
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
|
|
|
-static int physical_balance __read_mostly;
|
|
|
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
|
|
|
-
|
|
|
-static struct irq_cpu_info {
|
|
|
- unsigned long *last_irq;
|
|
|
- unsigned long *irq_delta;
|
|
|
- unsigned long irq;
|
|
|
-} irq_cpu_data[NR_CPUS];
|
|
|
-
|
|
|
-#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
|
|
|
-#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
|
|
|
-#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
|
|
|
-
|
|
|
-#define IDLE_ENOUGH(cpu,now) \
|
|
|
- (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
|
|
|
-
|
|
|
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
|
|
|
-
|
|
|
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
|
|
|
-
|
|
|
-static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
|
|
|
-
|
|
|
-static cpumask_t *balance_irq_affinity;
|
|
|
-
|
|
|
-
|
|
|
-static void __init irq_affinity_init_work(void *data)
|
|
|
-{
|
|
|
- struct dyn_array *da = data;
|
|
|
-
|
|
|
- int i;
|
|
|
- struct balance_irq_affinity *affinity;
|
|
|
-
|
|
|
- affinity = *da->name;
|
|
|
-
|
|
|
- for (i = 0; i < *da->nr; i++)
|
|
|
- memcpy(&affinity[i], &balance_irq_affinity_init,
|
|
|
- sizeof(struct balance_irq_affinity));
|
|
|
-
|
|
|
-}
|
|
|
-
|
|
|
-DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
|
|
|
-
|
|
|
-
|
|
|
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
|
-{
|
|
|
- balance_irq_affinity[irq] = mask;
|
|
|
-}
|
|
|
-
|
|
|
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
|
|
|
- unsigned long now, int direction)
|
|
|
-{
|
|
|
- int search_idle = 1;
|
|
|
- int cpu = curr_cpu;
|
|
|
-
|
|
|
- goto inside;
|
|
|
-
|
|
|
- do {
|
|
|
- if (unlikely(cpu == curr_cpu))
|
|
|
- search_idle = 0;
|
|
|
-inside:
|
|
|
- if (direction == 1) {
|
|
|
- cpu++;
|
|
|
- if (cpu >= NR_CPUS)
|
|
|
- cpu = 0;
|
|
|
- } else {
|
|
|
- cpu--;
|
|
|
- if (cpu == -1)
|
|
|
- cpu = NR_CPUS-1;
|
|
|
- }
|
|
|
- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
|
|
|
- (search_idle && !IDLE_ENOUGH(cpu, now)));
|
|
|
-
|
|
|
- return cpu;
|
|
|
-}
|
|
|
-
|
|
|
-static inline void balance_irq(int cpu, int irq)
|
|
|
-{
|
|
|
- unsigned long now = jiffies;
|
|
|
- cpumask_t allowed_mask;
|
|
|
- unsigned int new_cpu;
|
|
|
-
|
|
|
- if (irqbalance_disabled)
|
|
|
- return;
|
|
|
-
|
|
|
- cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
|
|
|
- new_cpu = move(cpu, allowed_mask, now, 1);
|
|
|
- if (cpu != new_cpu)
|
|
|
- set_pending_irq(irq, cpumask_of_cpu(new_cpu));
|
|
|
-}
|
|
|
-
|
|
|
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
|
|
|
-{
|
|
|
- int i, j;
|
|
|
- struct irq_desc *desc;
|
|
|
-
|
|
|
- for_each_online_cpu(i) {
|
|
|
- for (j = 0; j < nr_irqs; j++) {
|
|
|
- desc = irq_to_desc(j);
|
|
|
- if (!desc->action)
|
|
|
- continue;
|
|
|
- /* Is it a significant load ? */
|
|
|
- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
|
|
|
- useful_load_threshold)
|
|
|
- continue;
|
|
|
- balance_irq(i, j);
|
|
|
- }
|
|
|
- }
|
|
|
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
|
|
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
|
- return;
|
|
|
-}
|
|
|
-
|
|
|
-static void do_irq_balance(void)
|
|
|
-{
|
|
|
- int i, j;
|
|
|
- unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
|
|
|
- unsigned long move_this_load = 0;
|
|
|
- int max_loaded = 0, min_loaded = 0;
|
|
|
- int load;
|
|
|
- unsigned long useful_load_threshold = balanced_irq_interval + 10;
|
|
|
- int selected_irq;
|
|
|
- int tmp_loaded, first_attempt = 1;
|
|
|
- unsigned long tmp_cpu_irq;
|
|
|
- unsigned long imbalance = 0;
|
|
|
- cpumask_t allowed_mask, target_cpu_mask, tmp;
|
|
|
- struct irq_desc *desc;
|
|
|
-
|
|
|
- for_each_possible_cpu(i) {
|
|
|
- int package_index;
|
|
|
- CPU_IRQ(i) = 0;
|
|
|
- if (!cpu_online(i))
|
|
|
- continue;
|
|
|
- package_index = CPU_TO_PACKAGEINDEX(i);
|
|
|
- for (j = 0; j < nr_irqs; j++) {
|
|
|
- unsigned long value_now, delta;
|
|
|
- /* Is this an active IRQ or balancing disabled ? */
|
|
|
- desc = irq_to_desc(j);
|
|
|
- if (!desc->action || irq_balancing_disabled(j))
|
|
|
- continue;
|
|
|
- if (package_index == i)
|
|
|
- IRQ_DELTA(package_index, j) = 0;
|
|
|
- /* Determine the total count per processor per IRQ */
|
|
|
- value_now = (unsigned long) kstat_irqs_cpu(j, i);
|
|
|
-
|
|
|
- /* Determine the activity per processor per IRQ */
|
|
|
- delta = value_now - LAST_CPU_IRQ(i, j);
|
|
|
-
|
|
|
- /* Update last_cpu_irq[][] for the next time */
|
|
|
- LAST_CPU_IRQ(i, j) = value_now;
|
|
|
-
|
|
|
- /* Ignore IRQs whose rate is less than the clock */
|
|
|
- if (delta < useful_load_threshold)
|
|
|
- continue;
|
|
|
- /* update the load for the processor or package total */
|
|
|
- IRQ_DELTA(package_index, j) += delta;
|
|
|
-
|
|
|
- /* Keep track of the higher numbered sibling as well */
|
|
|
- if (i != package_index)
|
|
|
- CPU_IRQ(i) += delta;
|
|
|
- /*
|
|
|
- * We have sibling A and sibling B in the package
|
|
|
- *
|
|
|
- * cpu_irq[A] = load for cpu A + load for cpu B
|
|
|
- * cpu_irq[B] = load for cpu B
|
|
|
- */
|
|
|
- CPU_IRQ(package_index) += delta;
|
|
|
- }
|
|
|
- }
|
|
|
- /* Find the least loaded processor package */
|
|
|
- for_each_online_cpu(i) {
|
|
|
- if (i != CPU_TO_PACKAGEINDEX(i))
|
|
|
- continue;
|
|
|
- if (min_cpu_irq > CPU_IRQ(i)) {
|
|
|
- min_cpu_irq = CPU_IRQ(i);
|
|
|
- min_loaded = i;
|
|
|
- }
|
|
|
- }
|
|
|
- max_cpu_irq = ULONG_MAX;
|
|
|
-
|
|
|
-tryanothercpu:
|
|
|
- /*
|
|
|
- * Look for heaviest loaded processor.
|
|
|
- * We may come back to get the next heaviest loaded processor.
|
|
|
- * Skip processors with trivial loads.
|
|
|
- */
|
|
|
- tmp_cpu_irq = 0;
|
|
|
- tmp_loaded = -1;
|
|
|
- for_each_online_cpu(i) {
|
|
|
- if (i != CPU_TO_PACKAGEINDEX(i))
|
|
|
- continue;
|
|
|
- if (max_cpu_irq <= CPU_IRQ(i))
|
|
|
- continue;
|
|
|
- if (tmp_cpu_irq < CPU_IRQ(i)) {
|
|
|
- tmp_cpu_irq = CPU_IRQ(i);
|
|
|
- tmp_loaded = i;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (tmp_loaded == -1) {
|
|
|
- /*
|
|
|
- * In the case of small number of heavy interrupt sources,
|
|
|
- * loading some of the cpus too much. We use Ingo's original
|
|
|
- * approach to rotate them around.
|
|
|
- */
|
|
|
- if (!first_attempt && imbalance >= useful_load_threshold) {
|
|
|
- rotate_irqs_among_cpus(useful_load_threshold);
|
|
|
- return;
|
|
|
- }
|
|
|
- goto not_worth_the_effort;
|
|
|
- }
|
|
|
-
|
|
|
- first_attempt = 0; /* heaviest search */
|
|
|
- max_cpu_irq = tmp_cpu_irq; /* load */
|
|
|
- max_loaded = tmp_loaded; /* processor */
|
|
|
- imbalance = (max_cpu_irq - min_cpu_irq) / 2;
|
|
|
-
|
|
|
- /*
|
|
|
- * if imbalance is less than approx 10% of max load, then
|
|
|
- * observe diminishing returns action. - quit
|
|
|
- */
|
|
|
- if (imbalance < (max_cpu_irq >> 3))
|
|
|
- goto not_worth_the_effort;
|
|
|
-
|
|
|
-tryanotherirq:
|
|
|
- /* if we select an IRQ to move that can't go where we want, then
|
|
|
- * see if there is another one to try.
|
|
|
- */
|
|
|
- move_this_load = 0;
|
|
|
- selected_irq = -1;
|
|
|
- for (j = 0; j < nr_irqs; j++) {
|
|
|
- /* Is this an active IRQ? */
|
|
|
- desc = irq_to_desc(j);
|
|
|
- if (!desc->action)
|
|
|
- continue;
|
|
|
- if (imbalance <= IRQ_DELTA(max_loaded, j))
|
|
|
- continue;
|
|
|
- /* Try to find the IRQ that is closest to the imbalance
|
|
|
- * without going over.
|
|
|
- */
|
|
|
- if (move_this_load < IRQ_DELTA(max_loaded, j)) {
|
|
|
- move_this_load = IRQ_DELTA(max_loaded, j);
|
|
|
- selected_irq = j;
|
|
|
- }
|
|
|
- }
|
|
|
- if (selected_irq == -1)
|
|
|
- goto tryanothercpu;
|
|
|
-
|
|
|
- imbalance = move_this_load;
|
|
|
-
|
|
|
- /* For physical_balance case, we accumulated both load
|
|
|
- * values in the one of the siblings cpu_irq[],
|
|
|
- * to use the same code for physical and logical processors
|
|
|
- * as much as possible.
|
|
|
- *
|
|
|
- * NOTE: the cpu_irq[] array holds the sum of the load for
|
|
|
- * sibling A and sibling B in the slot for the lowest numbered
|
|
|
- * sibling (A), _AND_ the load for sibling B in the slot for
|
|
|
- * the higher numbered sibling.
|
|
|
- *
|
|
|
- * We seek the least loaded sibling by making the comparison
|
|
|
- * (A+B)/2 vs B
|
|
|
- */
|
|
|
- load = CPU_IRQ(min_loaded) >> 1;
|
|
|
- for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
|
|
|
- if (load > CPU_IRQ(j)) {
|
|
|
- /* This won't change cpu_sibling_map[min_loaded] */
|
|
|
- load = CPU_IRQ(j);
|
|
|
- min_loaded = j;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- cpus_and(allowed_mask,
|
|
|
- cpu_online_map,
|
|
|
- balance_irq_affinity[selected_irq]);
|
|
|
- target_cpu_mask = cpumask_of_cpu(min_loaded);
|
|
|
- cpus_and(tmp, target_cpu_mask, allowed_mask);
|
|
|
-
|
|
|
- if (!cpus_empty(tmp)) {
|
|
|
- /* mark for change destination */
|
|
|
- set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
|
|
|
-
|
|
|
- /* Since we made a change, come back sooner to
|
|
|
- * check for more variation.
|
|
|
- */
|
|
|
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
|
|
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
|
- return;
|
|
|
- }
|
|
|
- goto tryanotherirq;
|
|
|
-
|
|
|
-not_worth_the_effort:
|
|
|
- /*
|
|
|
- * if we did not find an IRQ to move, then adjust the time interval
|
|
|
- * upward
|
|
|
- */
|
|
|
- balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
|
|
|
- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
|
|
|
- return;
|
|
|
-}
|
|
|
-
|
|
|
-static int balanced_irq(void *unused)
|
|
|
-{
|
|
|
- int i;
|
|
|
- unsigned long prev_balance_time = jiffies;
|
|
|
- long time_remaining = balanced_irq_interval;
|
|
|
- struct irq_desc *desc;
|
|
|
-
|
|
|
- /* push everything to CPU 0 to give us a starting point. */
|
|
|
- for (i = 0 ; i < nr_irqs ; i++) {
|
|
|
- desc = irq_to_desc(i);
|
|
|
- desc->pending_mask = cpumask_of_cpu(0);
|
|
|
- set_pending_irq(i, cpumask_of_cpu(0));
|
|
|
- }
|
|
|
-
|
|
|
- set_freezable();
|
|
|
- for ( ; ; ) {
|
|
|
- time_remaining = schedule_timeout_interruptible(time_remaining);
|
|
|
- try_to_freeze();
|
|
|
- if (time_after(jiffies,
|
|
|
- prev_balance_time+balanced_irq_interval)) {
|
|
|
- preempt_disable();
|
|
|
- do_irq_balance();
|
|
|
- prev_balance_time = jiffies;
|
|
|
- time_remaining = balanced_irq_interval;
|
|
|
- preempt_enable();
|
|
|
- }
|
|
|
- }
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-static int __init balanced_irq_init(void)
|
|
|
-{
|
|
|
- int i;
|
|
|
- struct cpuinfo_x86 *c;
|
|
|
- cpumask_t tmp;
|
|
|
-
|
|
|
- cpus_shift_right(tmp, cpu_online_map, 2);
|
|
|
- c = &boot_cpu_data;
|
|
|
- /* When not overwritten by the command line ask subarchitecture. */
|
|
|
- if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
|
|
|
- irqbalance_disabled = NO_BALANCE_IRQ;
|
|
|
- if (irqbalance_disabled)
|
|
|
- return 0;
|
|
|
-
|
|
|
- /* disable irqbalance completely if there is only one processor online */
|
|
|
- if (num_online_cpus() < 2) {
|
|
|
- irqbalance_disabled = 1;
|
|
|
- return 0;
|
|
|
- }
|
|
|
- /*
|
|
|
- * Enable physical balance only if more than 1 physical processor
|
|
|
- * is present
|
|
|
- */
|
|
|
- if (smp_num_siblings > 1 && !cpus_empty(tmp))
|
|
|
- physical_balance = 1;
|
|
|
-
|
|
|
- for_each_online_cpu(i) {
|
|
|
- irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
|
|
|
- irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
|
|
|
- if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
|
|
|
- printk(KERN_ERR "balanced_irq_init: out of memory");
|
|
|
- goto failed;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- printk(KERN_INFO "Starting balanced_irq\n");
|
|
|
- if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
|
|
|
- return 0;
|
|
|
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
|
|
-failed:
|
|
|
- for_each_possible_cpu(i) {
|
|
|
- kfree(irq_cpu_data[i].irq_delta);
|
|
|
- irq_cpu_data[i].irq_delta = NULL;
|
|
|
- kfree(irq_cpu_data[i].last_irq);
|
|
|
- irq_cpu_data[i].last_irq = NULL;
|
|
|
- }
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
-int __devinit irqbalance_disable(char *str)
|
|
|
-{
|
|
|
- irqbalance_disabled = 1;
|
|
|
- return 1;
|
|
|
-}
|
|
|
-
|
|
|
-__setup("noirqbalance", irqbalance_disable);
|
|
|
-
|
|
|
-late_initcall(balanced_irq_init);
|
|
|
-#endif /* CONFIG_IRQBALANCE */
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
|
#ifndef CONFIG_SMP
|