|
@@ -362,10 +362,10 @@ $unaligned:
|
|
|
|
|
|
extql t2, a1, t2 # U :
|
|
extql t2, a1, t2 # U :
|
|
cmpbge zero, t1, t8 # E : is there a zero?
|
|
cmpbge zero, t1, t8 # E : is there a zero?
|
|
- andnot t2, t6, t12 # E : dest mask for a single word copy
|
|
|
|
|
|
+ andnot t2, t6, t2 # E : dest mask for a single word copy
|
|
or t8, t10, t5 # E : test for end-of-count too
|
|
or t8, t10, t5 # E : test for end-of-count too
|
|
|
|
|
|
- cmpbge zero, t12, t3 # E :
|
|
|
|
|
|
+ cmpbge zero, t2, t3 # E :
|
|
cmoveq a2, t5, t8 # E : Latency=2, extra map slot
|
|
cmoveq a2, t5, t8 # E : Latency=2, extra map slot
|
|
nop # E : keep with cmoveq
|
|
nop # E : keep with cmoveq
|
|
andnot t8, t3, t8 # E : (stall)
|
|
andnot t8, t3, t8 # E : (stall)
|
|
@@ -379,13 +379,13 @@ $unaligned:
|
|
negq t8, t6 # E : build bitmask of bytes <= zero
|
|
negq t8, t6 # E : build bitmask of bytes <= zero
|
|
mskqh t1, t4, t1 # U :
|
|
mskqh t1, t4, t1 # U :
|
|
|
|
|
|
- and t6, t8, t2 # E :
|
|
|
|
- subq t2, 1, t6 # E : (stall)
|
|
|
|
- or t6, t2, t8 # E : (stall)
|
|
|
|
- zapnot t12, t8, t12 # U : prepare source word; mirror changes (stall)
|
|
|
|
|
|
+ and t6, t8, t12 # E :
|
|
|
|
+ subq t12, 1, t6 # E : (stall)
|
|
|
|
+ or t6, t12, t8 # E : (stall)
|
|
|
|
+ zapnot t2, t8, t2 # U : prepare source word; mirror changes (stall)
|
|
|
|
|
|
zapnot t1, t8, t1 # U : to source validity mask
|
|
zapnot t1, t8, t1 # U : to source validity mask
|
|
- andnot t0, t12, t0 # E : zero place for source to reside
|
|
|
|
|
|
+ andnot t0, t2, t0 # E : zero place for source to reside
|
|
or t0, t1, t0 # E : and put it there (stall both t0, t1)
|
|
or t0, t1, t0 # E : and put it there (stall both t0, t1)
|
|
stq_u t0, 0(a0) # L : (stall)
|
|
stq_u t0, 0(a0) # L : (stall)
|
|
|
|
|