|
@@ -39,12 +39,14 @@
|
|
#ifdef USE_DOUBLE
|
|
#ifdef USE_DOUBLE
|
|
|
|
|
|
#define LOAD ld
|
|
#define LOAD ld
|
|
|
|
+#define LOAD32 lwu
|
|
#define ADD daddu
|
|
#define ADD daddu
|
|
#define NBYTES 8
|
|
#define NBYTES 8
|
|
|
|
|
|
#else
|
|
#else
|
|
|
|
|
|
#define LOAD lw
|
|
#define LOAD lw
|
|
|
|
+#define LOAD32 lw
|
|
#define ADD addu
|
|
#define ADD addu
|
|
#define NBYTES 4
|
|
#define NBYTES 4
|
|
|
|
|
|
@@ -60,6 +62,14 @@
|
|
ADD sum, v1; \
|
|
ADD sum, v1; \
|
|
.set pop
|
|
.set pop
|
|
|
|
|
|
|
|
+#define ADDC32(sum,reg) \
|
|
|
|
+ .set push; \
|
|
|
|
+ .set noat; \
|
|
|
|
+ addu sum, reg; \
|
|
|
|
+ sltu v1, sum, reg; \
|
|
|
|
+ addu sum, v1; \
|
|
|
|
+ .set pop
|
|
|
|
+
|
|
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
|
|
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
|
|
LOAD _t0, (offset + UNIT(0))(src); \
|
|
LOAD _t0, (offset + UNIT(0))(src); \
|
|
LOAD _t1, (offset + UNIT(1))(src); \
|
|
LOAD _t1, (offset + UNIT(1))(src); \
|
|
@@ -132,7 +142,7 @@ LEAF(csum_partial)
|
|
beqz t8, .Lqword_align
|
|
beqz t8, .Lqword_align
|
|
andi t8, src, 0x8
|
|
andi t8, src, 0x8
|
|
|
|
|
|
- lw t0, 0x00(src)
|
|
|
|
|
|
+ LOAD32 t0, 0x00(src)
|
|
LONG_SUBU a1, a1, 0x4
|
|
LONG_SUBU a1, a1, 0x4
|
|
ADDC(sum, t0)
|
|
ADDC(sum, t0)
|
|
PTR_ADDU src, src, 0x4
|
|
PTR_ADDU src, src, 0x4
|
|
@@ -211,7 +221,7 @@ LEAF(csum_partial)
|
|
LONG_SRL t8, t8, 0x2
|
|
LONG_SRL t8, t8, 0x2
|
|
|
|
|
|
.Lend_words:
|
|
.Lend_words:
|
|
- lw t0, (src)
|
|
|
|
|
|
+ LOAD32 t0, (src)
|
|
LONG_SUBU t8, t8, 0x1
|
|
LONG_SUBU t8, t8, 0x1
|
|
ADDC(sum, t0)
|
|
ADDC(sum, t0)
|
|
.set reorder /* DADDI_WAR */
|
|
.set reorder /* DADDI_WAR */
|
|
@@ -230,6 +240,9 @@ LEAF(csum_partial)
|
|
/* Still a full word to go */
|
|
/* Still a full word to go */
|
|
ulw t1, (src)
|
|
ulw t1, (src)
|
|
PTR_ADDIU src, 4
|
|
PTR_ADDIU src, 4
|
|
|
|
+#ifdef USE_DOUBLE
|
|
|
|
+ dsll t1, t1, 32 /* clear lower 32bit */
|
|
|
|
+#endif
|
|
ADDC(sum, t1)
|
|
ADDC(sum, t1)
|
|
|
|
|
|
1: move t1, zero
|
|
1: move t1, zero
|
|
@@ -280,7 +293,7 @@ LEAF(csum_partial)
|
|
1:
|
|
1:
|
|
.set reorder
|
|
.set reorder
|
|
/* Add the passed partial csum. */
|
|
/* Add the passed partial csum. */
|
|
- ADDC(sum, a2)
|
|
|
|
|
|
+ ADDC32(sum, a2)
|
|
jr ra
|
|
jr ra
|
|
.set noreorder
|
|
.set noreorder
|
|
END(csum_partial)
|
|
END(csum_partial)
|
|
@@ -681,7 +694,7 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc)
|
|
.set pop
|
|
.set pop
|
|
1:
|
|
1:
|
|
.set reorder
|
|
.set reorder
|
|
- ADDC(sum, psum)
|
|
|
|
|
|
+ ADDC32(sum, psum)
|
|
jr ra
|
|
jr ra
|
|
.set noreorder
|
|
.set noreorder
|
|
|
|
|