|
@@ -90,6 +90,12 @@ cpumask_t cpu_callout_map;
|
|
|
EXPORT_SYMBOL(cpu_callout_map);
|
|
|
static cpumask_t smp_commenced_mask;
|
|
|
|
|
|
+/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
|
|
|
+ * is no way to resync one AP against BP. TBD: for prescott and above, we
|
|
|
+ * should use IA64's algorithm
|
|
|
+ */
|
|
|
+static int __devinitdata tsc_sync_disabled;
|
|
|
+
|
|
|
/* Per CPU bogomips and other parameters */
|
|
|
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
|
|
|
EXPORT_SYMBOL(cpu_data);
|
|
@@ -427,7 +433,7 @@ static void __devinit smp_callin(void)
|
|
|
/*
|
|
|
* Synchronize the TSC with the BP
|
|
|
*/
|
|
|
- if (cpu_has_tsc && cpu_khz)
|
|
|
+ if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
|
|
|
synchronize_tsc_ap();
|
|
|
}
|
|
|
|
|
@@ -507,6 +513,7 @@ static void __devinit start_secondary(void *unused)
|
|
|
lock_ipi_call_lock();
|
|
|
cpu_set(smp_processor_id(), cpu_online_map);
|
|
|
unlock_ipi_call_lock();
|
|
|
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
|
|
|
|
|
|
/* We can take interrupts now: we're officially "up". */
|
|
|
local_irq_enable();
|
|
@@ -816,8 +823,43 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|
|
#endif /* WAKE_SECONDARY_VIA_INIT */
|
|
|
|
|
|
extern cpumask_t cpu_initialized;
|
|
|
+static inline int alloc_cpu_id(void)
|
|
|
+{
|
|
|
+ cpumask_t tmp_map;
|
|
|
+ int cpu;
|
|
|
+ cpus_complement(tmp_map, cpu_present_map);
|
|
|
+ cpu = first_cpu(tmp_map);
|
|
|
+ if (cpu >= NR_CPUS)
|
|
|
+ return -ENODEV;
|
|
|
+ return cpu;
|
|
|
+}
|
|
|
+
|
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
|
+static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS];
|
|
|
+static inline struct task_struct * alloc_idle_task(int cpu)
|
|
|
+{
|
|
|
+ struct task_struct *idle;
|
|
|
+
|
|
|
+ if ((idle = cpu_idle_tasks[cpu]) != NULL) {
|
|
|
+ /* initialize thread_struct. we really want to avoid destroy
|
|
|
+ * idle tread
|
|
|
+ */
|
|
|
+ idle->thread.esp = (unsigned long)(((struct pt_regs *)
|
|
|
+ (THREAD_SIZE + (unsigned long) idle->thread_info)) - 1);
|
|
|
+ init_idle(idle, cpu);
|
|
|
+ return idle;
|
|
|
+ }
|
|
|
+ idle = fork_idle(cpu);
|
|
|
+
|
|
|
+ if (!IS_ERR(idle))
|
|
|
+ cpu_idle_tasks[cpu] = idle;
|
|
|
+ return idle;
|
|
|
+}
|
|
|
+#else
|
|
|
+#define alloc_idle_task(cpu) fork_idle(cpu)
|
|
|
+#endif
|
|
|
|
|
|
-static int __devinit do_boot_cpu(int apicid)
|
|
|
+static int __devinit do_boot_cpu(int apicid, int cpu)
|
|
|
/*
|
|
|
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
|
|
|
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
|
|
@@ -826,16 +868,17 @@ static int __devinit do_boot_cpu(int apicid)
|
|
|
{
|
|
|
struct task_struct *idle;
|
|
|
unsigned long boot_error;
|
|
|
- int timeout, cpu;
|
|
|
+ int timeout;
|
|
|
unsigned long start_eip;
|
|
|
unsigned short nmi_high = 0, nmi_low = 0;
|
|
|
|
|
|
- cpu = ++cpucount;
|
|
|
+ ++cpucount;
|
|
|
+
|
|
|
/*
|
|
|
* We can't use kernel_thread since we must avoid to
|
|
|
* reschedule the child.
|
|
|
*/
|
|
|
- idle = fork_idle(cpu);
|
|
|
+ idle = alloc_idle_task(cpu);
|
|
|
if (IS_ERR(idle))
|
|
|
panic("failed fork for CPU %d", cpu);
|
|
|
idle->thread.eip = (unsigned long) start_secondary;
|
|
@@ -902,13 +945,16 @@ static int __devinit do_boot_cpu(int apicid)
|
|
|
inquire_remote_apic(apicid);
|
|
|
}
|
|
|
}
|
|
|
- x86_cpu_to_apicid[cpu] = apicid;
|
|
|
+
|
|
|
if (boot_error) {
|
|
|
/* Try to put things back the way they were before ... */
|
|
|
unmap_cpu_to_logical_apicid(cpu);
|
|
|
cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
|
|
|
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
|
|
|
cpucount--;
|
|
|
+ } else {
|
|
|
+ x86_cpu_to_apicid[cpu] = apicid;
|
|
|
+ cpu_set(cpu, cpu_present_map);
|
|
|
}
|
|
|
|
|
|
/* mark "stuck" area as not stuck */
|
|
@@ -917,6 +963,75 @@ static int __devinit do_boot_cpu(int apicid)
|
|
|
return boot_error;
|
|
|
}
|
|
|
|
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
|
+void cpu_exit_clear(void)
|
|
|
+{
|
|
|
+ int cpu = raw_smp_processor_id();
|
|
|
+
|
|
|
+ idle_task_exit();
|
|
|
+
|
|
|
+ cpucount --;
|
|
|
+ cpu_uninit();
|
|
|
+ irq_ctx_exit(cpu);
|
|
|
+
|
|
|
+ cpu_clear(cpu, cpu_callout_map);
|
|
|
+ cpu_clear(cpu, cpu_callin_map);
|
|
|
+ cpu_clear(cpu, cpu_present_map);
|
|
|
+
|
|
|
+ cpu_clear(cpu, smp_commenced_mask);
|
|
|
+ unmap_cpu_to_logical_apicid(cpu);
|
|
|
+}
|
|
|
+
|
|
|
+struct warm_boot_cpu_info {
|
|
|
+ struct completion *complete;
|
|
|
+ int apicid;
|
|
|
+ int cpu;
|
|
|
+};
|
|
|
+
|
|
|
+static void __devinit do_warm_boot_cpu(void *p)
|
|
|
+{
|
|
|
+ struct warm_boot_cpu_info *info = p;
|
|
|
+ do_boot_cpu(info->apicid, info->cpu);
|
|
|
+ complete(info->complete);
|
|
|
+}
|
|
|
+
|
|
|
+int __devinit smp_prepare_cpu(int cpu)
|
|
|
+{
|
|
|
+ DECLARE_COMPLETION(done);
|
|
|
+ struct warm_boot_cpu_info info;
|
|
|
+ struct work_struct task;
|
|
|
+ int apicid, ret;
|
|
|
+
|
|
|
+ lock_cpu_hotplug();
|
|
|
+ apicid = x86_cpu_to_apicid[cpu];
|
|
|
+ if (apicid == BAD_APICID) {
|
|
|
+ ret = -ENODEV;
|
|
|
+ goto exit;
|
|
|
+ }
|
|
|
+
|
|
|
+ info.complete = &done;
|
|
|
+ info.apicid = apicid;
|
|
|
+ info.cpu = cpu;
|
|
|
+ INIT_WORK(&task, do_warm_boot_cpu, &info);
|
|
|
+
|
|
|
+ tsc_sync_disabled = 1;
|
|
|
+
|
|
|
+ /* init low mem mapping */
|
|
|
+ memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
|
|
|
+ sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
|
|
|
+ flush_tlb_all();
|
|
|
+ schedule_work(&task);
|
|
|
+ wait_for_completion(&done);
|
|
|
+
|
|
|
+ tsc_sync_disabled = 0;
|
|
|
+ zap_low_mappings();
|
|
|
+ ret = 0;
|
|
|
+exit:
|
|
|
+ unlock_cpu_hotplug();
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
static void smp_tune_scheduling (void)
|
|
|
{
|
|
|
unsigned long cachesize; /* kB */
|
|
@@ -1069,7 +1184,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
|
|
|
if (max_cpus <= cpucount+1)
|
|
|
continue;
|
|
|
|
|
|
- if (do_boot_cpu(apicid))
|
|
|
+ if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
|
|
|
printk("CPU #%d not responding - cannot use it.\n",
|
|
|
apicid);
|
|
|
else
|
|
@@ -1149,25 +1264,24 @@ void __devinit smp_prepare_boot_cpu(void)
|
|
|
{
|
|
|
cpu_set(smp_processor_id(), cpu_online_map);
|
|
|
cpu_set(smp_processor_id(), cpu_callout_map);
|
|
|
+ cpu_set(smp_processor_id(), cpu_present_map);
|
|
|
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
-
|
|
|
-/* must be called with the cpucontrol mutex held */
|
|
|
-static int __devinit cpu_enable(unsigned int cpu)
|
|
|
+static void
|
|
|
+remove_siblinginfo(int cpu)
|
|
|
{
|
|
|
- /* get the target out of its holding state */
|
|
|
- per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
|
|
|
- wmb();
|
|
|
-
|
|
|
- /* wait for the processor to ack it. timeout? */
|
|
|
- while (!cpu_online(cpu))
|
|
|
- cpu_relax();
|
|
|
-
|
|
|
- fixup_irqs(cpu_online_map);
|
|
|
- /* counter the disable in fixup_irqs() */
|
|
|
- local_irq_enable();
|
|
|
- return 0;
|
|
|
+ int sibling;
|
|
|
+
|
|
|
+ for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
|
|
|
+ cpu_clear(cpu, cpu_sibling_map[sibling]);
|
|
|
+ for_each_cpu_mask(sibling, cpu_core_map[cpu])
|
|
|
+ cpu_clear(cpu, cpu_core_map[sibling]);
|
|
|
+ cpus_clear(cpu_sibling_map[cpu]);
|
|
|
+ cpus_clear(cpu_core_map[cpu]);
|
|
|
+ phys_proc_id[cpu] = BAD_APICID;
|
|
|
+ cpu_core_id[cpu] = BAD_APICID;
|
|
|
}
|
|
|
|
|
|
int __cpu_disable(void)
|
|
@@ -1193,6 +1307,8 @@ int __cpu_disable(void)
|
|
|
mdelay(1);
|
|
|
local_irq_disable();
|
|
|
|
|
|
+ remove_siblinginfo(cpu);
|
|
|
+
|
|
|
cpu_clear(cpu, map);
|
|
|
fixup_irqs(map);
|
|
|
/* It's now safe to remove this processor from the online map */
|
|
@@ -1207,8 +1323,10 @@ void __cpu_die(unsigned int cpu)
|
|
|
|
|
|
for (i = 0; i < 10; i++) {
|
|
|
/* They ack this in play_dead by setting CPU_DEAD */
|
|
|
- if (per_cpu(cpu_state, cpu) == CPU_DEAD)
|
|
|
+ if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
|
|
|
+ printk ("CPU %d is now offline\n", cpu);
|
|
|
return;
|
|
|
+ }
|
|
|
current->state = TASK_UNINTERRUPTIBLE;
|
|
|
schedule_timeout(HZ/10);
|
|
|
}
|
|
@@ -1236,15 +1354,8 @@ int __devinit __cpu_up(unsigned int cpu)
|
|
|
return -EIO;
|
|
|
}
|
|
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
|
- /* Already up, and in cpu_quiescent now? */
|
|
|
- if (cpu_isset(cpu, smp_commenced_mask)) {
|
|
|
- cpu_enable(cpu);
|
|
|
- return 0;
|
|
|
- }
|
|
|
-#endif
|
|
|
-
|
|
|
local_irq_enable();
|
|
|
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
|
|
|
/* Unleash the CPU! */
|
|
|
cpu_set(cpu, smp_commenced_mask);
|
|
|
while (!cpu_isset(cpu, cpu_online_map))
|
|
@@ -1258,10 +1369,12 @@ void __init smp_cpus_done(unsigned int max_cpus)
|
|
|
setup_ioapic_dest();
|
|
|
#endif
|
|
|
zap_low_mappings();
|
|
|
+#ifndef CONFIG_HOTPLUG_CPU
|
|
|
/*
|
|
|
* Disable executability of the SMP trampoline:
|
|
|
*/
|
|
|
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
|
|
|
+#endif
|
|
|
}
|
|
|
|
|
|
void __init smp_intr_init(void)
|