|
@@ -459,30 +459,35 @@ again:
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-static inline void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
|
|
|
+static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
|
|
|
{
|
|
|
+ u64 *mondo, data0, data1, data2;
|
|
|
+ u16 *cpu_list;
|
|
|
u64 pstate;
|
|
|
int i;
|
|
|
|
|
|
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
|
|
|
- for_each_cpu_mask_nr(i, *mask)
|
|
|
- spitfire_xcall_helper(data0, data1, data2, pstate, i);
|
|
|
+ cpu_list = __va(tb->cpu_list_pa);
|
|
|
+ mondo = __va(tb->cpu_mondo_block_pa);
|
|
|
+ data0 = mondo[0];
|
|
|
+ data1 = mondo[1];
|
|
|
+ data2 = mondo[2];
|
|
|
+ for (i = 0; i < cnt; i++)
|
|
|
+ spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
|
|
|
}
|
|
|
|
|
|
/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
|
|
|
* packet, but we have no use for that. However we do take advantage of
|
|
|
* the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
|
|
|
*/
|
|
|
-static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask_p)
|
|
|
+static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
|
|
|
{
|
|
|
- u64 pstate, ver, busy_mask;
|
|
|
int nack_busy_id, is_jbus, need_more;
|
|
|
- cpumask_t mask;
|
|
|
-
|
|
|
- if (cpus_empty(*mask_p))
|
|
|
- return;
|
|
|
+ u64 *mondo, pstate, ver, busy_mask;
|
|
|
+ u16 *cpu_list;
|
|
|
|
|
|
- mask = *mask_p;
|
|
|
+ cpu_list = __va(tb->cpu_list_pa);
|
|
|
+ mondo = __va(tb->cpu_mondo_block_pa);
|
|
|
|
|
|
/* Unfortunately, someone at Sun had the brilliant idea to make the
|
|
|
* busy/nack fields hard-coded by ITID number for this Ultra-III
|
|
@@ -505,7 +510,7 @@ retry:
|
|
|
"stxa %2, [%5] %6\n\t"
|
|
|
"membar #Sync\n\t"
|
|
|
: /* no outputs */
|
|
|
- : "r" (data0), "r" (data1), "r" (data2),
|
|
|
+ : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
|
|
|
"r" (0x40), "r" (0x50), "r" (0x60),
|
|
|
"i" (ASI_INTR_W));
|
|
|
|
|
@@ -514,11 +519,16 @@ retry:
|
|
|
{
|
|
|
int i;
|
|
|
|
|
|
- for_each_cpu_mask_nr(i, mask) {
|
|
|
- u64 target = (i << 14) | 0x70;
|
|
|
+ for (i = 0; i < cnt; i++) {
|
|
|
+ u64 target, nr;
|
|
|
+
|
|
|
+ nr = cpu_list[i];
|
|
|
+ if (nr == 0xffff)
|
|
|
+ continue;
|
|
|
|
|
|
+ target = (nr << 14) | 0x70;
|
|
|
if (is_jbus) {
|
|
|
- busy_mask |= (0x1UL << (i * 2));
|
|
|
+ busy_mask |= (0x1UL << (nr * 2));
|
|
|
} else {
|
|
|
target |= (nack_busy_id << 24);
|
|
|
busy_mask |= (0x1UL <<
|
|
@@ -552,11 +562,13 @@ retry:
|
|
|
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
|
|
|
: : "r" (pstate));
|
|
|
if (unlikely(need_more)) {
|
|
|
- int i, cnt = 0;
|
|
|
- for_each_cpu_mask_nr(i, mask) {
|
|
|
- cpu_clear(i, mask);
|
|
|
- cnt++;
|
|
|
- if (cnt == 32)
|
|
|
+ int i, this_cnt = 0;
|
|
|
+ for (i = 0; i < cnt; i++) {
|
|
|
+ if (cpu_list[i] == 0xffff)
|
|
|
+ continue;
|
|
|
+ cpu_list[i] = 0xffff;
|
|
|
+ this_cnt++;
|
|
|
+ if (this_cnt == 32)
|
|
|
break;
|
|
|
}
|
|
|
goto retry;
|
|
@@ -587,16 +599,20 @@ retry:
|
|
|
/* Clear out the mask bits for cpus which did not
|
|
|
* NACK us.
|
|
|
*/
|
|
|
- for_each_cpu_mask_nr(i, mask) {
|
|
|
- u64 check_mask;
|
|
|
+ for (i = 0; i < cnt; i++) {
|
|
|
+ u64 check_mask, nr;
|
|
|
+
|
|
|
+ nr = cpu_list[i];
|
|
|
+ if (nr == 0xffff)
|
|
|
+ continue;
|
|
|
|
|
|
if (is_jbus)
|
|
|
- check_mask = (0x2UL << (2*i));
|
|
|
+ check_mask = (0x2UL << (2*nr));
|
|
|
else
|
|
|
check_mask = (0x2UL <<
|
|
|
this_busy_nack);
|
|
|
if ((dispatch_stat & check_mask) == 0)
|
|
|
- cpu_clear(i, mask);
|
|
|
+ cpu_list[i] = 0xffff;
|
|
|
this_busy_nack += 2;
|
|
|
if (this_busy_nack == 64)
|
|
|
break;
|
|
@@ -608,34 +624,17 @@ retry:
|
|
|
}
|
|
|
|
|
|
/* Multi-cpu list version. */
|
|
|
-static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
|
|
|
+static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
|
|
|
{
|
|
|
- int cnt, retries, this_cpu, prev_sent, i;
|
|
|
+ int retries, this_cpu, prev_sent, i;
|
|
|
unsigned long status;
|
|
|
cpumask_t error_mask;
|
|
|
- struct trap_per_cpu *tb;
|
|
|
u16 *cpu_list;
|
|
|
- u64 *mondo;
|
|
|
-
|
|
|
- if (cpus_empty(*mask))
|
|
|
- return;
|
|
|
|
|
|
this_cpu = smp_processor_id();
|
|
|
- tb = &trap_block[this_cpu];
|
|
|
-
|
|
|
- mondo = __va(tb->cpu_mondo_block_pa);
|
|
|
- mondo[0] = data0;
|
|
|
- mondo[1] = data1;
|
|
|
- mondo[2] = data2;
|
|
|
- wmb();
|
|
|
|
|
|
cpu_list = __va(tb->cpu_list_pa);
|
|
|
|
|
|
- /* Setup the initial cpu list. */
|
|
|
- cnt = 0;
|
|
|
- for_each_cpu_mask_nr(i, *mask)
|
|
|
- cpu_list[cnt++] = i;
|
|
|
-
|
|
|
cpus_clear(error_mask);
|
|
|
retries = 0;
|
|
|
prev_sent = 0;
|
|
@@ -743,11 +742,15 @@ dump_cpu_list_and_out:
|
|
|
printk("]\n");
|
|
|
}
|
|
|
|
|
|
-static void (*xcall_deliver_impl)(u64, u64, u64, const cpumask_t *);
|
|
|
+static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
|
|
|
|
|
|
static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
|
|
|
{
|
|
|
+ struct trap_per_cpu *tb;
|
|
|
+ int this_cpu, i, cnt;
|
|
|
unsigned long flags;
|
|
|
+ u16 *cpu_list;
|
|
|
+ u64 *mondo;
|
|
|
|
|
|
/* We have to do this whole thing with interrupts fully disabled.
|
|
|
* Otherwise if we send an xcall from interrupt context it will
|
|
@@ -760,7 +763,29 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
|
|
|
* Fortunately, udelay() uses %stick/%tick so we can use that.
|
|
|
*/
|
|
|
local_irq_save(flags);
|
|
|
- xcall_deliver_impl(data0, data1, data2, mask);
|
|
|
+
|
|
|
+ this_cpu = smp_processor_id();
|
|
|
+ tb = &trap_block[this_cpu];
|
|
|
+
|
|
|
+ mondo = __va(tb->cpu_mondo_block_pa);
|
|
|
+ mondo[0] = data0;
|
|
|
+ mondo[1] = data1;
|
|
|
+ mondo[2] = data2;
|
|
|
+ wmb();
|
|
|
+
|
|
|
+ cpu_list = __va(tb->cpu_list_pa);
|
|
|
+
|
|
|
+ /* Setup the initial cpu list. */
|
|
|
+ cnt = 0;
|
|
|
+ for_each_cpu_mask_nr(i, *mask) {
|
|
|
+ if (i == this_cpu || !cpu_online(i))
|
|
|
+ continue;
|
|
|
+ cpu_list[cnt++] = i;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (cnt)
|
|
|
+ xcall_deliver_impl(tb, cnt);
|
|
|
+
|
|
|
local_irq_restore(flags);
|
|
|
}
|
|
|
|