Quellcode durchsuchen

Blackfin: dpmc: optimize hibernate/resume path

The current save logic used in hibernation is to do a MMR load (base +
offset) into a register, and then push that onto the stack.  Then when
restoring, pop off the stack into a register followed by a MMR store
(base + offset).  These use plenty of 32bit insns rather than 16bit,
are pretty long winded, and full of pipeline bubbles.

So, by taking advantage of MMRs that are contiguous, the multi-register
push/pop insn, and register abuse, we can shrink this code considerably.

When saving, the new logic does a lot of loads into the data and pointer
registers before executing a single multi-register push insn.  Then when
restoring, we do a single multi-register pop insn followed by a lot of
stores.  Overall, this allows us to cut the insn count by ~30%, the code
size by ~45%, and drastically reduce the register hazards that trigger
bubbles in the pipeline.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Mike Frysinger vor 14 Jahren
Ursprung
Commit
eed7b83658
2 geänderte Dateien mit 572 neuen und 439 gelöschten Zeilen
  1. 0 1
      arch/blackfin/include/asm/dpmc.h
  2. 572 438
      arch/blackfin/mach-common/dpmc_modes.S

+ 0 - 1
arch/blackfin/include/asm/dpmc.h

@@ -117,7 +117,6 @@
 #ifndef __ASSEMBLY__
 
 void sleep_mode(u32 sic_iwr0, u32 sic_iwr1, u32 sic_iwr2);
-void hibernate_mode(u32 sic_iwr0, u32 sic_iwr1, u32 sic_iwr2);
 void sleep_deeper(u32 sic_iwr0, u32 sic_iwr1, u32 sic_iwr2);
 void do_hibernate(int wakeup);
 void set_dram_srfs(void);

+ 572 - 438
arch/blackfin/mach-common/dpmc_modes.S

@@ -50,11 +50,21 @@ ENTRY(_sleep_mode)
 	RTS;
 ENDPROC(_sleep_mode)
 
+/*
+ * This func never returns as it puts the part into hibernate, and
+ * is only called from do_hibernate, so we don't bother saving or
+ * restoring any of the normal C runtime state.  When we wake up,
+ * the entry point will be in do_hibernate and not here.
+ *
+ * We accept just one argument -- the value to write to VR_CTL.
+ */
 ENTRY(_hibernate_mode)
-	[--SP] = ( R7:0, P5:0 );
-	[--SP] =  RETS;
+	/* Save/setup the regs we need early for minor pipeline optimization */
+	R4 = R0;
+	P3.H = hi(VR_CTL);
+	P3.L = lo(VR_CTL);
 
-	R3 = R0;
+	/* Disable all wakeup sources */
 	R0 = IWR_DISABLE_ALL;
 	R1 = IWR_DISABLE_ALL;
 	R2 = IWR_DISABLE_ALL;
@@ -62,10 +72,8 @@ ENTRY(_hibernate_mode)
 	call _set_dram_srfs;
 	SSYNC;
 
-	P0.H = hi(VR_CTL);
-	P0.L = lo(VR_CTL);
-
-	W[P0] = R3.L;
+	/* Finally, we climb into our cave to hibernate */
+	W[P3] = R4.L;
 	CLI R2;
 	IDLE;
 .Lforever:
@@ -268,227 +276,55 @@ ENDPROC(_test_pll_locked)
 
 .section .text
 
-#define PM_PUSH(x) \
-	R0 = [P0 + (x - SRAM_BASE_ADDRESS)];\
-	[--SP] =  R0;\
-
-#define PM_POP(x) \
-	R0 = [SP++];\
-	[P0 + (x - SRAM_BASE_ADDRESS)] = R0;\
-
-#define PM_SYS_PUSH(x) \
-	R0 = [P0 + (x - PLL_CTL)];\
-	[--SP] =  R0;\
-
-#define PM_SYS_POP(x) \
-	R0 = [SP++];\
-	[P0 + (x - PLL_CTL)] = R0;\
-
-#define PM_SYS_PUSH16(x) \
-	R0 = w[P0 + (x - PLL_CTL)];\
-	[--SP] =  R0;\
-
-#define PM_SYS_POP16(x) \
-	R0 = [SP++];\
-	w[P0 + (x - PLL_CTL)] = R0;\
+#define PM_REG0  R7
+#define PM_REG1  R6
+#define PM_REG2  R5
+#define PM_REG3  R4
+#define PM_REG4  R3
+#define PM_REG5  R2
+#define PM_REG6  R1
+#define PM_REG7  R0
+#define PM_REG8  P5
+#define PM_REG9  P4
+#define PM_REG10 P3
+#define PM_REG11 P2
+#define PM_REG12 P1
+#define PM_REG13 P0
+
+#define PM_REGSET0  R7:7
+#define PM_REGSET1  R7:6
+#define PM_REGSET2  R7:5
+#define PM_REGSET3  R7:4
+#define PM_REGSET4  R7:3
+#define PM_REGSET5  R7:2
+#define PM_REGSET6  R7:1
+#define PM_REGSET7  R7:0
+#define PM_REGSET8  R7:0, P5:5
+#define PM_REGSET9  R7:0, P5:4
+#define PM_REGSET10 R7:0, P5:3
+#define PM_REGSET11 R7:0, P5:2
+#define PM_REGSET12 R7:0, P5:1
+#define PM_REGSET13 R7:0, P5:0
+
+#define _PM_PUSH(n, x, w, base) PM_REG##n = w[FP + ((x) - (base))];
+#define _PM_POP(n, x, w, base)  w[FP + ((x) - (base))] = PM_REG##n;
+#define PM_PUSH_SYNC(n)         [--sp] = (PM_REGSET##n);
+#define PM_POP_SYNC(n)          (PM_REGSET##n) = [sp++];
+#define PM_PUSH(n, x)           PM_REG##n = [FP++];
+#define PM_POP(n, x)            [FP--] = PM_REG##n;
+#define PM_CORE_PUSH(n, x)      _PM_PUSH(n, x, , COREMMR_BASE)
+#define PM_CORE_POP(n, x)       _PM_POP(n, x, , COREMMR_BASE)
+#define PM_SYS_PUSH(n, x)       _PM_PUSH(n, x, , SYSMMR_BASE)
+#define PM_SYS_POP(n, x)        _PM_POP(n, x, , SYSMMR_BASE)
+#define PM_SYS_PUSH16(n, x)     _PM_PUSH(n, x, w, SYSMMR_BASE)
+#define PM_SYS_POP16(n, x)      _PM_POP(n, x, w, SYSMMR_BASE)
 
 ENTRY(_do_hibernate)
-	[--SP] = ( R7:0, P5:0 );
-	[--SP] =  RETS;
-	/* Save System MMRs */
-	R2 = R0;
-	P0.H = hi(PLL_CTL);
-	P0.L = lo(PLL_CTL);
-
-#ifdef SIC_IMASK0
-	PM_SYS_PUSH(SIC_IMASK0)
-#endif
-#ifdef SIC_IMASK1
-	PM_SYS_PUSH(SIC_IMASK1)
-#endif
-#ifdef SIC_IMASK2
-	PM_SYS_PUSH(SIC_IMASK2)
-#endif
-#ifdef SIC_IMASK
-	PM_SYS_PUSH(SIC_IMASK)
-#endif
-#ifdef SIC_IAR0
-	PM_SYS_PUSH(SIC_IAR0)
-	PM_SYS_PUSH(SIC_IAR1)
-	PM_SYS_PUSH(SIC_IAR2)
-#endif
-#ifdef SIC_IAR3
-	PM_SYS_PUSH(SIC_IAR3)
-#endif
-#ifdef SIC_IAR4
-	PM_SYS_PUSH(SIC_IAR4)
-	PM_SYS_PUSH(SIC_IAR5)
-	PM_SYS_PUSH(SIC_IAR6)
-#endif
-#ifdef SIC_IAR7
-	PM_SYS_PUSH(SIC_IAR7)
-#endif
-#ifdef SIC_IAR8
-	PM_SYS_PUSH(SIC_IAR8)
-	PM_SYS_PUSH(SIC_IAR9)
-	PM_SYS_PUSH(SIC_IAR10)
-	PM_SYS_PUSH(SIC_IAR11)
-#endif
-
-#ifdef SIC_IWR
-	PM_SYS_PUSH(SIC_IWR)
-#endif
-#ifdef SIC_IWR0
-	PM_SYS_PUSH(SIC_IWR0)
-#endif
-#ifdef SIC_IWR1
-	PM_SYS_PUSH(SIC_IWR1)
-#endif
-#ifdef SIC_IWR2
-	PM_SYS_PUSH(SIC_IWR2)
-#endif
-
-#ifdef PINT0_ASSIGN
-	PM_SYS_PUSH(PINT0_MASK_SET)
-	PM_SYS_PUSH(PINT1_MASK_SET)
-	PM_SYS_PUSH(PINT2_MASK_SET)
-	PM_SYS_PUSH(PINT3_MASK_SET)
-	PM_SYS_PUSH(PINT0_ASSIGN)
-	PM_SYS_PUSH(PINT1_ASSIGN)
-	PM_SYS_PUSH(PINT2_ASSIGN)
-	PM_SYS_PUSH(PINT3_ASSIGN)
-	PM_SYS_PUSH(PINT0_INVERT_SET)
-	PM_SYS_PUSH(PINT1_INVERT_SET)
-	PM_SYS_PUSH(PINT2_INVERT_SET)
-	PM_SYS_PUSH(PINT3_INVERT_SET)
-	PM_SYS_PUSH(PINT0_EDGE_SET)
-	PM_SYS_PUSH(PINT1_EDGE_SET)
-	PM_SYS_PUSH(PINT2_EDGE_SET)
-	PM_SYS_PUSH(PINT3_EDGE_SET)
-#endif
-
-	PM_SYS_PUSH(EBIU_AMBCTL0)
-	PM_SYS_PUSH(EBIU_AMBCTL1)
-	PM_SYS_PUSH16(EBIU_AMGCTL)
-
-#ifdef EBIU_FCTL
-	PM_SYS_PUSH(EBIU_MBSCTL)
-	PM_SYS_PUSH(EBIU_MODE)
-	PM_SYS_PUSH(EBIU_FCTL)
-#endif
-
-#ifdef PORTCIO_FER
-	PM_SYS_PUSH16(PORTCIO_DIR)
-	PM_SYS_PUSH16(PORTCIO_INEN)
-	PM_SYS_PUSH16(PORTCIO)
-	PM_SYS_PUSH16(PORTCIO_FER)
-	PM_SYS_PUSH16(PORTDIO_DIR)
-	PM_SYS_PUSH16(PORTDIO_INEN)
-	PM_SYS_PUSH16(PORTDIO)
-	PM_SYS_PUSH16(PORTDIO_FER)
-	PM_SYS_PUSH16(PORTEIO_DIR)
-	PM_SYS_PUSH16(PORTEIO_INEN)
-	PM_SYS_PUSH16(PORTEIO)
-	PM_SYS_PUSH16(PORTEIO_FER)
-#endif
-
-	PM_SYS_PUSH16(SYSCR)
-
-	/* Save Core MMRs */
-	P0.H = hi(SRAM_BASE_ADDRESS);
-	P0.L = lo(SRAM_BASE_ADDRESS);
-
-	PM_PUSH(DMEM_CONTROL)
-	PM_PUSH(DCPLB_ADDR0)
-	PM_PUSH(DCPLB_ADDR1)
-	PM_PUSH(DCPLB_ADDR2)
-	PM_PUSH(DCPLB_ADDR3)
-	PM_PUSH(DCPLB_ADDR4)
-	PM_PUSH(DCPLB_ADDR5)
-	PM_PUSH(DCPLB_ADDR6)
-	PM_PUSH(DCPLB_ADDR7)
-	PM_PUSH(DCPLB_ADDR8)
-	PM_PUSH(DCPLB_ADDR9)
-	PM_PUSH(DCPLB_ADDR10)
-	PM_PUSH(DCPLB_ADDR11)
-	PM_PUSH(DCPLB_ADDR12)
-	PM_PUSH(DCPLB_ADDR13)
-	PM_PUSH(DCPLB_ADDR14)
-	PM_PUSH(DCPLB_ADDR15)
-	PM_PUSH(DCPLB_DATA0)
-	PM_PUSH(DCPLB_DATA1)
-	PM_PUSH(DCPLB_DATA2)
-	PM_PUSH(DCPLB_DATA3)
-	PM_PUSH(DCPLB_DATA4)
-	PM_PUSH(DCPLB_DATA5)
-	PM_PUSH(DCPLB_DATA6)
-	PM_PUSH(DCPLB_DATA7)
-	PM_PUSH(DCPLB_DATA8)
-	PM_PUSH(DCPLB_DATA9)
-	PM_PUSH(DCPLB_DATA10)
-	PM_PUSH(DCPLB_DATA11)
-	PM_PUSH(DCPLB_DATA12)
-	PM_PUSH(DCPLB_DATA13)
-	PM_PUSH(DCPLB_DATA14)
-	PM_PUSH(DCPLB_DATA15)
-	PM_PUSH(IMEM_CONTROL)
-	PM_PUSH(ICPLB_ADDR0)
-	PM_PUSH(ICPLB_ADDR1)
-	PM_PUSH(ICPLB_ADDR2)
-	PM_PUSH(ICPLB_ADDR3)
-	PM_PUSH(ICPLB_ADDR4)
-	PM_PUSH(ICPLB_ADDR5)
-	PM_PUSH(ICPLB_ADDR6)
-	PM_PUSH(ICPLB_ADDR7)
-	PM_PUSH(ICPLB_ADDR8)
-	PM_PUSH(ICPLB_ADDR9)
-	PM_PUSH(ICPLB_ADDR10)
-	PM_PUSH(ICPLB_ADDR11)
-	PM_PUSH(ICPLB_ADDR12)
-	PM_PUSH(ICPLB_ADDR13)
-	PM_PUSH(ICPLB_ADDR14)
-	PM_PUSH(ICPLB_ADDR15)
-	PM_PUSH(ICPLB_DATA0)
-	PM_PUSH(ICPLB_DATA1)
-	PM_PUSH(ICPLB_DATA2)
-	PM_PUSH(ICPLB_DATA3)
-	PM_PUSH(ICPLB_DATA4)
-	PM_PUSH(ICPLB_DATA5)
-	PM_PUSH(ICPLB_DATA6)
-	PM_PUSH(ICPLB_DATA7)
-	PM_PUSH(ICPLB_DATA8)
-	PM_PUSH(ICPLB_DATA9)
-	PM_PUSH(ICPLB_DATA10)
-	PM_PUSH(ICPLB_DATA11)
-	PM_PUSH(ICPLB_DATA12)
-	PM_PUSH(ICPLB_DATA13)
-	PM_PUSH(ICPLB_DATA14)
-	PM_PUSH(ICPLB_DATA15)
-	PM_PUSH(EVT2)
-	PM_PUSH(EVT3)
-	PM_PUSH(EVT5)
-	PM_PUSH(EVT6)
-	PM_PUSH(EVT7)
-	PM_PUSH(EVT8)
-	PM_PUSH(EVT9)
-	PM_PUSH(EVT10)
-	PM_PUSH(EVT11)
-	PM_PUSH(EVT12)
-	PM_PUSH(EVT13)
-	PM_PUSH(EVT14)
-	PM_PUSH(EVT15)
-	PM_PUSH(IMASK)
-	PM_PUSH(ILAT)
-	PM_PUSH(IPRIO)
-	PM_PUSH(TCNTL)
-	PM_PUSH(TPERIOD)
-	PM_PUSH(TSCALE)
-	PM_PUSH(TCOUNT)
-	PM_PUSH(TBUFCTL)
-
-	/* Save Core Registers */
-	[--sp] = SYSCFG;
-	[--sp] = ( R7:0, P5:0 );
+	/*
+	 * Save the core regs early so we can blow them away when
+	 * saving/restoring MMR states
+	 */
+	[--sp] = (R7:0, P5:0);
 	[--sp] = fp;
 	[--sp] = usp;
 
@@ -523,43 +359,532 @@ ENTRY(_do_hibernate)
 	[--sp] = LB0;
 	[--sp] = LB1;
 
+	/* We can't push RETI directly as that'll change IPEND[4] */
+	r7 = RETI;
+	[--sp] = RETS;
 	[--sp] = ASTAT;
 	[--sp] = CYCLES;
 	[--sp] = CYCLES2;
-
-	[--sp] = RETS;
-	r0 = RETI;
-	[--sp] = r0;
+	[--sp] = SYSCFG;
 	[--sp] = RETX;
 	[--sp] = SEQSTAT;
+	[--sp] = r7;
+
+	/* Save first func arg in M3 */
+	M3 = R0;
+
+	/* Save system MMRs */
+	FP.H = hi(SYSMMR_BASE);
+	FP.L = lo(SYSMMR_BASE);
+
+#ifdef SIC_IMASK0
+	PM_SYS_PUSH(0, SIC_IMASK0)
+	PM_SYS_PUSH(1, SIC_IMASK1)
+# ifdef SIC_IMASK2
+	PM_SYS_PUSH(2, SIC_IMASK2)
+# endif
+#else
+	PM_SYS_PUSH(0, SIC_IMASK)
+#endif
+#ifdef SIC_IAR0
+	PM_SYS_PUSH(3, SIC_IAR0)
+	PM_SYS_PUSH(4, SIC_IAR1)
+	PM_SYS_PUSH(5, SIC_IAR2)
+#endif
+#ifdef SIC_IAR3
+	PM_SYS_PUSH(6, SIC_IAR3)
+#endif
+#ifdef SIC_IAR4
+	PM_SYS_PUSH(7, SIC_IAR4)
+	PM_SYS_PUSH(8, SIC_IAR5)
+	PM_SYS_PUSH(9, SIC_IAR6)
+#endif
+#ifdef SIC_IAR7
+	PM_SYS_PUSH(10, SIC_IAR7)
+#endif
+#ifdef SIC_IAR8
+	PM_SYS_PUSH(11, SIC_IAR8)
+	PM_SYS_PUSH(12, SIC_IAR9)
+	PM_SYS_PUSH(13, SIC_IAR10)
+#endif
+	PM_PUSH_SYNC(13)
+#ifdef SIC_IAR11
+	PM_SYS_PUSH(0, SIC_IAR11)
+#endif
+
+#ifdef SIC_IWR
+	PM_SYS_PUSH(1, SIC_IWR)
+#endif
+#ifdef SIC_IWR0
+	PM_SYS_PUSH(1, SIC_IWR0)
+#endif
+#ifdef SIC_IWR1
+	PM_SYS_PUSH(2, SIC_IWR1)
+#endif
+#ifdef SIC_IWR2
+	PM_SYS_PUSH(3, SIC_IWR2)
+#endif
+
+#ifdef PINT0_ASSIGN
+	PM_SYS_PUSH(4, PINT0_MASK_SET)
+	PM_SYS_PUSH(5, PINT1_MASK_SET)
+	PM_SYS_PUSH(6, PINT2_MASK_SET)
+	PM_SYS_PUSH(7, PINT3_MASK_SET)
+	PM_SYS_PUSH(8, PINT0_ASSIGN)
+	PM_SYS_PUSH(9, PINT1_ASSIGN)
+	PM_SYS_PUSH(10, PINT2_ASSIGN)
+	PM_SYS_PUSH(11, PINT3_ASSIGN)
+	PM_SYS_PUSH(12, PINT0_INVERT_SET)
+	PM_SYS_PUSH(13, PINT1_INVERT_SET)
+	PM_PUSH_SYNC(13)
+	PM_SYS_PUSH(0, PINT2_INVERT_SET)
+	PM_SYS_PUSH(1, PINT3_INVERT_SET)
+	PM_SYS_PUSH(2, PINT0_EDGE_SET)
+	PM_SYS_PUSH(3, PINT1_EDGE_SET)
+	PM_SYS_PUSH(4, PINT2_EDGE_SET)
+	PM_SYS_PUSH(5, PINT3_EDGE_SET)
+#endif
+
+	PM_SYS_PUSH16(6, SYSCR)
+
+	PM_SYS_PUSH16(7, EBIU_AMGCTL)
+	PM_SYS_PUSH(8, EBIU_AMBCTL0)
+	PM_SYS_PUSH(9, EBIU_AMBCTL1)
+#ifdef EBIU_FCTL
+	PM_SYS_PUSH(10, EBIU_MBSCTL)
+	PM_SYS_PUSH(11, EBIU_MODE)
+	PM_SYS_PUSH(12, EBIU_FCTL)
+	PM_PUSH_SYNC(12)
+#else
+	PM_PUSH_SYNC(9)
+#endif
+
+#ifdef PORTCIO_FER
+	/* 16bit loads can only be done with dregs */
+	PM_SYS_PUSH16(0, PORTCIO_DIR)
+	PM_SYS_PUSH16(1, PORTCIO_INEN)
+	PM_SYS_PUSH16(2, PORTCIO)
+	PM_SYS_PUSH16(3, PORTCIO_FER)
+	PM_SYS_PUSH16(4, PORTDIO_DIR)
+	PM_SYS_PUSH16(5, PORTDIO_INEN)
+	PM_SYS_PUSH16(6, PORTDIO)
+	PM_SYS_PUSH16(7, PORTDIO_FER)
+	PM_PUSH_SYNC(7)
+	PM_SYS_PUSH16(0, PORTEIO_DIR)
+	PM_SYS_PUSH16(1, PORTEIO_INEN)
+	PM_SYS_PUSH16(2, PORTEIO)
+	PM_SYS_PUSH16(3, PORTEIO_FER)
+	PM_PUSH_SYNC(3)
+#endif
+
+	/* Save Core MMRs */
+	I0.H = hi(COREMMR_BASE);
+	I0.L = lo(COREMMR_BASE);
+	I1 = I0;
+	I2 = I0;
+	I3 = I0;
+	B0 = I0;
+	B1 = I0;
+	B2 = I0;
+	B3 = I0;
+	I1.L = lo(DCPLB_ADDR0);
+	I2.L = lo(DCPLB_DATA0);
+	I3.L = lo(ICPLB_ADDR0);
+	B0.L = lo(ICPLB_DATA0);
+	B1.L = lo(EVT2);
+	B2.L = lo(IMASK);
+	B3.L = lo(TCNTL);
+
+	/* DCPLB Addr */
+	FP = I1;
+	PM_PUSH(0, DCPLB_ADDR0)
+	PM_PUSH(1, DCPLB_ADDR1)
+	PM_PUSH(2, DCPLB_ADDR2)
+	PM_PUSH(3, DCPLB_ADDR3)
+	PM_PUSH(4, DCPLB_ADDR4)
+	PM_PUSH(5, DCPLB_ADDR5)
+	PM_PUSH(6, DCPLB_ADDR6)
+	PM_PUSH(7, DCPLB_ADDR7)
+	PM_PUSH(8, DCPLB_ADDR8)
+	PM_PUSH(9, DCPLB_ADDR9)
+	PM_PUSH(10, DCPLB_ADDR10)
+	PM_PUSH(11, DCPLB_ADDR11)
+	PM_PUSH(12, DCPLB_ADDR12)
+	PM_PUSH(13, DCPLB_ADDR13)
+	PM_PUSH_SYNC(13)
+	PM_PUSH(0, DCPLB_ADDR14)
+	PM_PUSH(1, DCPLB_ADDR15)
+
+	/* DCPLB Data */
+	FP = I2;
+	PM_PUSH(2, DCPLB_DATA0)
+	PM_PUSH(3, DCPLB_DATA1)
+	PM_PUSH(4, DCPLB_DATA2)
+	PM_PUSH(5, DCPLB_DATA3)
+	PM_PUSH(6, DCPLB_DATA4)
+	PM_PUSH(7, DCPLB_DATA5)
+	PM_PUSH(8, DCPLB_DATA6)
+	PM_PUSH(9, DCPLB_DATA7)
+	PM_PUSH(10, DCPLB_DATA8)
+	PM_PUSH(11, DCPLB_DATA9)
+	PM_PUSH(12, DCPLB_DATA10)
+	PM_PUSH(13, DCPLB_DATA11)
+	PM_PUSH_SYNC(13)
+	PM_PUSH(0, DCPLB_DATA12)
+	PM_PUSH(1, DCPLB_DATA13)
+	PM_PUSH(2, DCPLB_DATA14)
+	PM_PUSH(3, DCPLB_DATA15)
+
+	/* ICPLB Addr */
+	FP = I3;
+	PM_PUSH(4, ICPLB_ADDR0)
+	PM_PUSH(5, ICPLB_ADDR1)
+	PM_PUSH(6, ICPLB_ADDR2)
+	PM_PUSH(7, ICPLB_ADDR3)
+	PM_PUSH(8, ICPLB_ADDR4)
+	PM_PUSH(9, ICPLB_ADDR5)
+	PM_PUSH(10, ICPLB_ADDR6)
+	PM_PUSH(11, ICPLB_ADDR7)
+	PM_PUSH(12, ICPLB_ADDR8)
+	PM_PUSH(13, ICPLB_ADDR9)
+	PM_PUSH_SYNC(13)
+	PM_PUSH(0, ICPLB_ADDR10)
+	PM_PUSH(1, ICPLB_ADDR11)
+	PM_PUSH(2, ICPLB_ADDR12)
+	PM_PUSH(3, ICPLB_ADDR13)
+	PM_PUSH(4, ICPLB_ADDR14)
+	PM_PUSH(5, ICPLB_ADDR15)
+
+	/* ICPLB Data */
+	FP = B0;
+	PM_PUSH(6, ICPLB_DATA0)
+	PM_PUSH(7, ICPLB_DATA1)
+	PM_PUSH(8, ICPLB_DATA2)
+	PM_PUSH(9, ICPLB_DATA3)
+	PM_PUSH(10, ICPLB_DATA4)
+	PM_PUSH(11, ICPLB_DATA5)
+	PM_PUSH(12, ICPLB_DATA6)
+	PM_PUSH(13, ICPLB_DATA7)
+	PM_PUSH_SYNC(13)
+	PM_PUSH(0, ICPLB_DATA8)
+	PM_PUSH(1, ICPLB_DATA9)
+	PM_PUSH(2, ICPLB_DATA10)
+	PM_PUSH(3, ICPLB_DATA11)
+	PM_PUSH(4, ICPLB_DATA12)
+	PM_PUSH(5, ICPLB_DATA13)
+	PM_PUSH(6, ICPLB_DATA14)
+	PM_PUSH(7, ICPLB_DATA15)
+
+	/* Event Vectors */
+	FP = B1;
+	PM_PUSH(8, EVT2)
+	PM_PUSH(9, EVT3)
+	FP += 4;	/* EVT4 */
+	PM_PUSH(10, EVT5)
+	PM_PUSH(11, EVT6)
+	PM_PUSH(12, EVT7)
+	PM_PUSH(13, EVT8)
+	PM_PUSH_SYNC(13)
+	PM_PUSH(0, EVT9)
+	PM_PUSH(1, EVT10)
+	PM_PUSH(2, EVT11)
+	PM_PUSH(3, EVT12)
+	PM_PUSH(4, EVT13)
+	PM_PUSH(5, EVT14)
+	PM_PUSH(6, EVT15)
+
+	/* CEC */
+	FP = B2;
+	PM_PUSH(7, IMASK)
+	FP += 4;	/* IPEND */
+	PM_PUSH(8, ILAT)
+	PM_PUSH(9, IPRIO)
+
+	/* Core Timer */
+	FP = B3;
+	PM_PUSH(10, TCNTL)
+	PM_PUSH(11, TPERIOD)
+	PM_PUSH(12, TSCALE)
+	PM_PUSH(13, TCOUNT)
+	PM_PUSH_SYNC(13)
+
+	/* Misc non-contiguous registers */
+	FP = I0;
+	PM_CORE_PUSH(0, DMEM_CONTROL);
+	PM_CORE_PUSH(1, IMEM_CONTROL);
+	PM_CORE_PUSH(2, TBUFCTL);
+	PM_PUSH_SYNC(2)
+
+	/* Setup args to hibernate mode early for pipeline optimization */
+	R0 = M3;
+	P1.H = _hibernate_mode;
+	P1.L = _hibernate_mode;
 
 	/* Save Magic, return address and Stack Pointer */
-	P0.H = 0;
-	P0.L = 0;
-	R0.H = 0xDEAD;	/* Hibernate Magic */
-	R0.L = 0xBEEF;
-	[P0++] = R0;	/* Store Hibernate Magic */
-	R0.H = .Lpm_resume_here;
-	R0.L = .Lpm_resume_here;
-	[P0++] = R0;	/* Save Return Address */
+	P0 = 0;
+	R1.H = 0xDEAD;	/* Hibernate Magic */
+	R1.L = 0xBEEF;
+	R2.H = .Lpm_resume_here;
+	R2.L = .Lpm_resume_here;
+	[P0++] = R1;	/* Store Hibernate Magic */
+	[P0++] = R2;	/* Save Return Address */
 	[P0++] = SP;	/* Save Stack Pointer */
-	P0.H = _hibernate_mode;
-	P0.L = _hibernate_mode;
-	R0 = R2;
-	call (P0); /* Goodbye */
+
+	/* Must use an indirect call as we need to jump to L1 */
+	call (P1); /* Goodbye */
 
 .Lpm_resume_here:
 
+	/* Restore Core MMRs */
+	I0.H = hi(COREMMR_BASE);
+	I0.L = lo(COREMMR_BASE);
+	I1 = I0;
+	I2 = I0;
+	I3 = I0;
+	B0 = I0;
+	B1 = I0;
+	B2 = I0;
+	B3 = I0;
+	I1.L = lo(DCPLB_ADDR15);
+	I2.L = lo(DCPLB_DATA15);
+	I3.L = lo(ICPLB_ADDR15);
+	B0.L = lo(ICPLB_DATA15);
+	B1.L = lo(EVT15);
+	B2.L = lo(IPRIO);
+	B3.L = lo(TCOUNT);
+
+	/* Misc non-contiguous registers */
+	FP = I0;
+	PM_POP_SYNC(2)
+	PM_CORE_POP(2, TBUFCTL)
+	PM_CORE_POP(1, IMEM_CONTROL)
+	PM_CORE_POP(0, DMEM_CONTROL)
+
+	/* Core Timer */
+	PM_POP_SYNC(13)
+	FP = B3;
+	PM_POP(13, TCOUNT)
+	PM_POP(12, TSCALE)
+	PM_POP(11, TPERIOD)
+	PM_POP(10, TCNTL)
+
+	/* CEC */
+	FP = B2;
+	PM_POP(9, IPRIO)
+	PM_POP(8, ILAT)
+	FP += -4;	/* IPEND */
+	PM_POP(7, IMASK)
+
+	/* Event Vectors */
+	FP = B1;
+	PM_POP(6, EVT15)
+	PM_POP(5, EVT14)
+	PM_POP(4, EVT13)
+	PM_POP(3, EVT12)
+	PM_POP(2, EVT11)
+	PM_POP(1, EVT10)
+	PM_POP(0, EVT9)
+	PM_POP_SYNC(13)
+	PM_POP(13, EVT8)
+	PM_POP(12, EVT7)
+	PM_POP(11, EVT6)
+	PM_POP(10, EVT5)
+	FP += -4;	/* EVT4 */
+	PM_POP(9, EVT3)
+	PM_POP(8, EVT2)
+
+	/* ICPLB Data */
+	FP = B0;
+	PM_POP(7, ICPLB_DATA15)
+	PM_POP(6, ICPLB_DATA14)
+	PM_POP(5, ICPLB_DATA13)
+	PM_POP(4, ICPLB_DATA12)
+	PM_POP(3, ICPLB_DATA11)
+	PM_POP(2, ICPLB_DATA10)
+	PM_POP(1, ICPLB_DATA9)
+	PM_POP(0, ICPLB_DATA8)
+	PM_POP_SYNC(13)
+	PM_POP(13, ICPLB_DATA7)
+	PM_POP(12, ICPLB_DATA6)
+	PM_POP(11, ICPLB_DATA5)
+	PM_POP(10, ICPLB_DATA4)
+	PM_POP(9, ICPLB_DATA3)
+	PM_POP(8, ICPLB_DATA2)
+	PM_POP(7, ICPLB_DATA1)
+	PM_POP(6, ICPLB_DATA0)
+
+	/* ICPLB Addr */
+	FP = I3;
+	PM_POP(5, ICPLB_ADDR15)
+	PM_POP(4, ICPLB_ADDR14)
+	PM_POP(3, ICPLB_ADDR13)
+	PM_POP(2, ICPLB_ADDR12)
+	PM_POP(1, ICPLB_ADDR11)
+	PM_POP(0, ICPLB_ADDR10)
+	PM_POP_SYNC(13)
+	PM_POP(13, ICPLB_ADDR9)
+	PM_POP(12, ICPLB_ADDR8)
+	PM_POP(11, ICPLB_ADDR7)
+	PM_POP(10, ICPLB_ADDR6)
+	PM_POP(9, ICPLB_ADDR5)
+	PM_POP(8, ICPLB_ADDR4)
+	PM_POP(7, ICPLB_ADDR3)
+	PM_POP(6, ICPLB_ADDR2)
+	PM_POP(5, ICPLB_ADDR1)
+	PM_POP(4, ICPLB_ADDR0)
+
+	/* DCPLB Data */
+	FP = I2;
+	PM_POP(3, DCPLB_DATA15)
+	PM_POP(2, DCPLB_DATA14)
+	PM_POP(1, DCPLB_DATA13)
+	PM_POP(0, DCPLB_DATA12)
+	PM_POP_SYNC(13)
+	PM_POP(13, DCPLB_DATA11)
+	PM_POP(12, DCPLB_DATA10)
+	PM_POP(11, DCPLB_DATA9)
+	PM_POP(10, DCPLB_DATA8)
+	PM_POP(9, DCPLB_DATA7)
+	PM_POP(8, DCPLB_DATA6)
+	PM_POP(7, DCPLB_DATA5)
+	PM_POP(6, DCPLB_DATA4)
+	PM_POP(5, DCPLB_DATA3)
+	PM_POP(4, DCPLB_DATA2)
+	PM_POP(3, DCPLB_DATA1)
+	PM_POP(2, DCPLB_DATA0)
+
+	/* DCPLB Addr */
+	FP = I1;
+	PM_POP(1, DCPLB_ADDR15)
+	PM_POP(0, DCPLB_ADDR14)
+	PM_POP_SYNC(13)
+	PM_POP(13, DCPLB_ADDR13)
+	PM_POP(12, DCPLB_ADDR12)
+	PM_POP(11, DCPLB_ADDR11)
+	PM_POP(10, DCPLB_ADDR10)
+	PM_POP(9, DCPLB_ADDR9)
+	PM_POP(8, DCPLB_ADDR8)
+	PM_POP(7, DCPLB_ADDR7)
+	PM_POP(6, DCPLB_ADDR6)
+	PM_POP(5, DCPLB_ADDR5)
+	PM_POP(4, DCPLB_ADDR4)
+	PM_POP(3, DCPLB_ADDR3)
+	PM_POP(2, DCPLB_ADDR2)
+	PM_POP(1, DCPLB_ADDR1)
+	PM_POP(0, DCPLB_ADDR0)
+
+	/* Restore System MMRs */
+	FP.H = hi(SYSMMR_BASE);
+	FP.L = lo(SYSMMR_BASE);
+
+#ifdef PORTCIO_FER
+	PM_POP_SYNC(3)
+	PM_SYS_POP16(3, PORTEIO_FER)
+	PM_SYS_POP16(2, PORTEIO)
+	PM_SYS_POP16(1, PORTEIO_INEN)
+	PM_SYS_POP16(0, PORTEIO_DIR)
+	PM_POP_SYNC(7)
+	PM_SYS_POP16(7, PORTDIO_FER)
+	PM_SYS_POP16(6, PORTDIO)
+	PM_SYS_POP16(5, PORTDIO_INEN)
+	PM_SYS_POP16(4, PORTDIO_DIR)
+	PM_SYS_POP16(3, PORTCIO_FER)
+	PM_SYS_POP16(2, PORTCIO)
+	PM_SYS_POP16(1, PORTCIO_INEN)
+	PM_SYS_POP16(0, PORTCIO_DIR)
+#endif
+
+#ifdef EBIU_FCTL
+	PM_POP_SYNC(12)
+	PM_SYS_POP(12, EBIU_FCTL)
+	PM_SYS_POP(11, EBIU_MODE)
+	PM_SYS_POP(10, EBIU_MBSCTL)
+#else
+	PM_POP_SYNC(9)
+#endif
+	PM_SYS_POP(9, EBIU_AMBCTL1)
+	PM_SYS_POP(8, EBIU_AMBCTL0)
+	PM_SYS_POP16(7, EBIU_AMGCTL)
+
+	PM_SYS_POP16(6, SYSCR)
+
+#ifdef PINT0_ASSIGN
+	PM_SYS_POP(5, PINT3_EDGE_SET)
+	PM_SYS_POP(4, PINT2_EDGE_SET)
+	PM_SYS_POP(3, PINT1_EDGE_SET)
+	PM_SYS_POP(2, PINT0_EDGE_SET)
+	PM_SYS_POP(1, PINT3_INVERT_SET)
+	PM_SYS_POP(0, PINT2_INVERT_SET)
+	PM_POP_SYNC(13)
+	PM_SYS_POP(13, PINT1_INVERT_SET)
+	PM_SYS_POP(12, PINT0_INVERT_SET)
+	PM_SYS_POP(11, PINT3_ASSIGN)
+	PM_SYS_POP(10, PINT2_ASSIGN)
+	PM_SYS_POP(9, PINT1_ASSIGN)
+	PM_SYS_POP(8, PINT0_ASSIGN)
+	PM_SYS_POP(7, PINT3_MASK_SET)
+	PM_SYS_POP(6, PINT2_MASK_SET)
+	PM_SYS_POP(5, PINT1_MASK_SET)
+	PM_SYS_POP(4, PINT0_MASK_SET)
+#endif
+
+#ifdef SIC_IWR2
+	PM_SYS_POP(3, SIC_IWR2)
+#endif
+#ifdef SIC_IWR1
+	PM_SYS_POP(2, SIC_IWR1)
+#endif
+#ifdef SIC_IWR0
+	PM_SYS_POP(1, SIC_IWR0)
+#endif
+#ifdef SIC_IWR
+	PM_SYS_POP(1, SIC_IWR)
+#endif
+
+#ifdef SIC_IAR11
+	PM_SYS_POP(0, SIC_IAR11)
+#endif
+	PM_POP_SYNC(13)
+#ifdef SIC_IAR8
+	PM_SYS_POP(13, SIC_IAR10)
+	PM_SYS_POP(12, SIC_IAR9)
+	PM_SYS_POP(11, SIC_IAR8)
+#endif
+#ifdef SIC_IAR7
+	PM_SYS_POP(10, SIC_IAR7)
+#endif
+#ifdef SIC_IAR6
+	PM_SYS_POP(9, SIC_IAR6)
+	PM_SYS_POP(8, SIC_IAR5)
+	PM_SYS_POP(7, SIC_IAR4)
+#endif
+#ifdef SIC_IAR3
+	PM_SYS_POP(6, SIC_IAR3)
+#endif
+#ifdef SIC_IAR0
+	PM_SYS_POP(5, SIC_IAR2)
+	PM_SYS_POP(4, SIC_IAR1)
+	PM_SYS_POP(3, SIC_IAR0)
+#endif
+#ifdef SIC_IMASK0
+# ifdef SIC_IMASK2
+	PM_SYS_POP(2, SIC_IMASK2)
+# endif
+	PM_SYS_POP(1, SIC_IMASK1)
+	PM_SYS_POP(0, SIC_IMASK0)
+#else
+	PM_SYS_POP(0, SIC_IMASK)
+#endif
+
 	/* Restore Core Registers */
+	RETI = [sp++];
 	SEQSTAT = [sp++];
 	RETX = [sp++];
-	r0 = [sp++];
-	RETI = r0;
-	RETS = [sp++];
-
+	SYSCFG = [sp++];
 	CYCLES2 = [sp++];
 	CYCLES = [sp++];
 	ASTAT = [sp++];
+	RETS = [sp++];
 
 	LB1 = [sp++];
 	LB0 = [sp++];
@@ -594,201 +919,10 @@ ENTRY(_do_hibernate)
 
 	usp = [sp++];
 	fp = [sp++];
-
-	( R7 : 0, P5 : 0) = [ SP ++ ];
-	SYSCFG = [sp++];
-
-	/* Restore Core MMRs */
-
-	PM_POP(TBUFCTL)
-	PM_POP(TCOUNT)
-	PM_POP(TSCALE)
-	PM_POP(TPERIOD)
-	PM_POP(TCNTL)
-	PM_POP(IPRIO)
-	PM_POP(ILAT)
-	PM_POP(IMASK)
-	PM_POP(EVT15)
-	PM_POP(EVT14)
-	PM_POP(EVT13)
-	PM_POP(EVT12)
-	PM_POP(EVT11)
-	PM_POP(EVT10)
-	PM_POP(EVT9)
-	PM_POP(EVT8)
-	PM_POP(EVT7)
-	PM_POP(EVT6)
-	PM_POP(EVT5)
-	PM_POP(EVT3)
-	PM_POP(EVT2)
-	PM_POP(ICPLB_DATA15)
-	PM_POP(ICPLB_DATA14)
-	PM_POP(ICPLB_DATA13)
-	PM_POP(ICPLB_DATA12)
-	PM_POP(ICPLB_DATA11)
-	PM_POP(ICPLB_DATA10)
-	PM_POP(ICPLB_DATA9)
-	PM_POP(ICPLB_DATA8)
-	PM_POP(ICPLB_DATA7)
-	PM_POP(ICPLB_DATA6)
-	PM_POP(ICPLB_DATA5)
-	PM_POP(ICPLB_DATA4)
-	PM_POP(ICPLB_DATA3)
-	PM_POP(ICPLB_DATA2)
-	PM_POP(ICPLB_DATA1)
-	PM_POP(ICPLB_DATA0)
-	PM_POP(ICPLB_ADDR15)
-	PM_POP(ICPLB_ADDR14)
-	PM_POP(ICPLB_ADDR13)
-	PM_POP(ICPLB_ADDR12)
-	PM_POP(ICPLB_ADDR11)
-	PM_POP(ICPLB_ADDR10)
-	PM_POP(ICPLB_ADDR9)
-	PM_POP(ICPLB_ADDR8)
-	PM_POP(ICPLB_ADDR7)
-	PM_POP(ICPLB_ADDR6)
-	PM_POP(ICPLB_ADDR5)
-	PM_POP(ICPLB_ADDR4)
-	PM_POP(ICPLB_ADDR3)
-	PM_POP(ICPLB_ADDR2)
-	PM_POP(ICPLB_ADDR1)
-	PM_POP(ICPLB_ADDR0)
-	PM_POP(IMEM_CONTROL)
-	PM_POP(DCPLB_DATA15)
-	PM_POP(DCPLB_DATA14)
-	PM_POP(DCPLB_DATA13)
-	PM_POP(DCPLB_DATA12)
-	PM_POP(DCPLB_DATA11)
-	PM_POP(DCPLB_DATA10)
-	PM_POP(DCPLB_DATA9)
-	PM_POP(DCPLB_DATA8)
-	PM_POP(DCPLB_DATA7)
-	PM_POP(DCPLB_DATA6)
-	PM_POP(DCPLB_DATA5)
-	PM_POP(DCPLB_DATA4)
-	PM_POP(DCPLB_DATA3)
-	PM_POP(DCPLB_DATA2)
-	PM_POP(DCPLB_DATA1)
-	PM_POP(DCPLB_DATA0)
-	PM_POP(DCPLB_ADDR15)
-	PM_POP(DCPLB_ADDR14)
-	PM_POP(DCPLB_ADDR13)
-	PM_POP(DCPLB_ADDR12)
-	PM_POP(DCPLB_ADDR11)
-	PM_POP(DCPLB_ADDR10)
-	PM_POP(DCPLB_ADDR9)
-	PM_POP(DCPLB_ADDR8)
-	PM_POP(DCPLB_ADDR7)
-	PM_POP(DCPLB_ADDR6)
-	PM_POP(DCPLB_ADDR5)
-	PM_POP(DCPLB_ADDR4)
-	PM_POP(DCPLB_ADDR3)
-	PM_POP(DCPLB_ADDR2)
-	PM_POP(DCPLB_ADDR1)
-	PM_POP(DCPLB_ADDR0)
-	PM_POP(DMEM_CONTROL)
-
-	/* Restore System MMRs */
-
-	P0.H = hi(PLL_CTL);
-	P0.L = lo(PLL_CTL);
-	PM_SYS_POP16(SYSCR)
-
-#ifdef PORTCIO_FER
-	PM_SYS_POP16(PORTEIO_FER)
-	PM_SYS_POP16(PORTEIO)
-	PM_SYS_POP16(PORTEIO_INEN)
-	PM_SYS_POP16(PORTEIO_DIR)
-	PM_SYS_POP16(PORTDIO_FER)
-	PM_SYS_POP16(PORTDIO)
-	PM_SYS_POP16(PORTDIO_INEN)
-	PM_SYS_POP16(PORTDIO_DIR)
-	PM_SYS_POP16(PORTCIO_FER)
-	PM_SYS_POP16(PORTCIO)
-	PM_SYS_POP16(PORTCIO_INEN)
-	PM_SYS_POP16(PORTCIO_DIR)
-#endif
-
-#ifdef EBIU_FCTL
-	PM_SYS_POP(EBIU_FCTL)
-	PM_SYS_POP(EBIU_MODE)
-	PM_SYS_POP(EBIU_MBSCTL)
-#endif
-	PM_SYS_POP16(EBIU_AMGCTL)
-	PM_SYS_POP(EBIU_AMBCTL1)
-	PM_SYS_POP(EBIU_AMBCTL0)
-
-#ifdef PINT0_ASSIGN
-	PM_SYS_POP(PINT3_EDGE_SET)
-	PM_SYS_POP(PINT2_EDGE_SET)
-	PM_SYS_POP(PINT1_EDGE_SET)
-	PM_SYS_POP(PINT0_EDGE_SET)
-	PM_SYS_POP(PINT3_INVERT_SET)
-	PM_SYS_POP(PINT2_INVERT_SET)
-	PM_SYS_POP(PINT1_INVERT_SET)
-	PM_SYS_POP(PINT0_INVERT_SET)
-	PM_SYS_POP(PINT3_ASSIGN)
-	PM_SYS_POP(PINT2_ASSIGN)
-	PM_SYS_POP(PINT1_ASSIGN)
-	PM_SYS_POP(PINT0_ASSIGN)
-	PM_SYS_POP(PINT3_MASK_SET)
-	PM_SYS_POP(PINT2_MASK_SET)
-	PM_SYS_POP(PINT1_MASK_SET)
-	PM_SYS_POP(PINT0_MASK_SET)
-#endif
-
-#ifdef SIC_IWR2
-	PM_SYS_POP(SIC_IWR2)
-#endif
-#ifdef SIC_IWR1
-	PM_SYS_POP(SIC_IWR1)
-#endif
-#ifdef SIC_IWR0
-	PM_SYS_POP(SIC_IWR0)
-#endif
-#ifdef SIC_IWR
-	PM_SYS_POP(SIC_IWR)
-#endif
-
-#ifdef SIC_IAR8
-	PM_SYS_POP(SIC_IAR11)
-	PM_SYS_POP(SIC_IAR10)
-	PM_SYS_POP(SIC_IAR9)
-	PM_SYS_POP(SIC_IAR8)
-#endif
-#ifdef SIC_IAR7
-	PM_SYS_POP(SIC_IAR7)
-#endif
-#ifdef SIC_IAR6
-	PM_SYS_POP(SIC_IAR6)
-	PM_SYS_POP(SIC_IAR5)
-	PM_SYS_POP(SIC_IAR4)
-#endif
-#ifdef SIC_IAR3
-	PM_SYS_POP(SIC_IAR3)
-#endif
-#ifdef SIC_IAR0
-	PM_SYS_POP(SIC_IAR2)
-	PM_SYS_POP(SIC_IAR1)
-	PM_SYS_POP(SIC_IAR0)
-#endif
-#ifdef SIC_IMASK
-	PM_SYS_POP(SIC_IMASK)
-#endif
-#ifdef SIC_IMASK2
-	PM_SYS_POP(SIC_IMASK2)
-#endif
-#ifdef SIC_IMASK1
-	PM_SYS_POP(SIC_IMASK1)
-#endif
-#ifdef SIC_IMASK0
-	PM_SYS_POP(SIC_IMASK0)
-#endif
+	(R7:0, P5:0) = [sp++];
 
 	[--sp] = RETI;	/* Clear Global Interrupt Disable */
 	SP += 4;
 
-	RETS = [SP++];
-	( R7:0, P5:0 ) = [SP++];
 	RTS;
 ENDPROC(_do_hibernate)